diff --git a/.gitignore b/.gitignore index 9efed50..000f169 100644 --- a/.gitignore +++ b/.gitignore @@ -9,5 +9,5 @@ TestResults *.dotCover *.nupkg build - -.vs +util/ +.vs/ diff --git a/.vs/Piglet/v15/sqlite3/storage.ide b/.vs/Piglet/v15/sqlite3/storage.ide new file mode 100644 index 0000000..53759e5 Binary files /dev/null and b/.vs/Piglet/v15/sqlite3/storage.ide differ diff --git a/Demo/Debug/CoreCompileInputs.cache b/Demo/Debug/CoreCompileInputs.cache new file mode 100644 index 0000000..70db212 --- /dev/null +++ b/Demo/Debug/CoreCompileInputs.cache @@ -0,0 +1 @@ +14c86c5107a4d04c0343e28e8f15d438958d165b diff --git a/Demo/Debug/DesignTimeResolveAssemblyReferencesInput.cache b/Demo/Debug/DesignTimeResolveAssemblyReferencesInput.cache new file mode 100644 index 0000000..afe1f75 Binary files /dev/null and b/Demo/Debug/DesignTimeResolveAssemblyReferencesInput.cache differ diff --git a/Demo/Demo.cs b/Demo/Demo.cs index 04e8c13..f36c4a8 100644 --- a/Demo/Demo.cs +++ b/Demo/Demo.cs @@ -1,19 +1,132 @@ -using System; -using Piglet.Demo.Lexer; -using Piglet.Demo.Parser; - -namespace Piglet.Demo -{ - public class Demo - { - public static void Main(string[] args) - { - // Simple demo runner - WordsAndNumbers.Run(); - Movement.Run(); - JsonParser.Run(); - BlogFormatParser.RunFluent(); - //Console.ReadKey(); - } - } -} +using System; +using Piglet.Parser.Configuration.Generic; +using Piglet.Parser.Configuration; +using Piglet.Parser.Construction; +using Piglet.Demo.Parser; +using Piglet.Demo.Lexer; +using System.Linq; +using Piglet.Parser; + +namespace Piglet.Demo +{ + using f = Func; + + public class Demo + { + public static void Main(string[] args) + { + static int Exp(int a, int x) => Enumerable.Range(0, x).Aggregate(1, (acc, i) => acc * a); + var configurator = ParserFactory.Configure(); + + INonTerminal expr = configurator.CreateNonTerminal(); + INonTerminal term = configurator.CreateNonTerminal(); + INonTerminal factor = configurator.CreateNonTerminal(); + INonTerminal expexpr = configurator.CreateNonTerminal(); + ITerminal number = configurator.CreateTerminal("\\d+", t => int.Parse(t, System.Globalization.CultureInfo.InvariantCulture)); + ITerminal add = configurator.CreateTerminal("\\+"); + ITerminal sub = configurator.CreateTerminal("-"); + ITerminal mul = configurator.CreateTerminal("\\*"); + ITerminal div = configurator.CreateTerminal("/"); + ITerminal pow = configurator.CreateTerminal("[\\^]"); + + expr.AddProduction(expr, add, term).SetReduceFunction(s => s[0] + s[2]); + expr.AddProduction(expr, sub, term).SetReduceFunction(s => s[0] - s[2]); + expr.AddProduction(term).SetReduceFunction(s => s[0]); + + term.AddProduction(term, mul, expexpr).SetReduceFunction(s => s[0] * s[2]); + term.AddProduction(term, div, expexpr).SetReduceFunction(s => s[0] / s[2]); + term.AddProduction(expexpr).SetReduceFunction(s => s[0]); + + expexpr.AddProduction(expexpr, pow, factor).SetReduceFunction(s => Exp(s[0], s[2])); + expexpr.AddProduction(factor).SetReduceFunction(s => s[0]); + + factor.AddProduction(number).SetReduceFunction(s => s[0]); + factor.AddProduction("(", expr, ")").SetReduceFunction(s => s[1]); + + var parser = configurator.CreateParser(); + var value = parser.Parse("3^4 + 1"); + + + + + + + + + + + + + + + var s = @"x + -5 - 3 * -x * x"; + var par = new test_lexer().CreateParser(); + var f = par.Parse(s); + + Console.WriteLine(s); + Console.WriteLine(f); + + return; + + // Simple demo runner + WordsAndNumbers.Run(); + Movement.Run(); + JsonParser.Run(); + BlogFormatParser.RunFluent(); + //Console.ReadKey(); + } + + + private class test_lexer + : ParserConstructor<(string, f)> + { + /**/ + protected override void Construct(NonTerminalWrapper<(string, f)> nt_func) + { + var nt_expr = CreateNonTerminal<(string s, f f)>(); + var nt_unop = CreateNonTerminal>(); + var nt_binop = CreateNonTerminal>(); + + var t_lit = CreateTerminal(@"\d+", x => (i: int.Parse(x), x)); + var t_var = CreateTerminal(@"x", x => x); + var t_add = CreateTerminal(@"\+", x => x); + var t_mul = CreateTerminal(@"\*", x => x); + var t_sub = CreateTerminal(@"-", x => x); + var t_div = CreateTerminal(@"/", x => x); + var t_mod = CreateTerminal(@"%", x => x); + var t_oparen = CreateTerminal(@"\("); + var t_cparen = CreateTerminal(@"\)"); + + SetPrecedenceList( + (AssociativityDirection.Left, new[] { t_add, t_sub }), + (AssociativityDirection.Left, new[] { t_mul, t_div, t_mod }) + // (AssociativityDirection.Right, new[] { t_pow }) + ); + + var prec_b = SetAssociativity(AssociativityDirection.Left); + var prec_u = SetAssociativity(AssociativityDirection.Right); + + CreateProduction(nt_func, nt_expr); + CreateProduction(nt_unop, t_add, _ => f1 => ($"(+{f1.s})", f1.f)); + CreateProduction(nt_unop, t_sub, _ => f1 => ($"(-{f1.s})", x => f1.f(x))); + CreateProduction(nt_binop, t_mod, _ => (f1, f2) => ($"({f1.s} % {f2.s})", x => f1.f(x) % f2.f(x))); + CreateProduction(nt_binop, t_div, _ => (f1, f2) => ($"({f1.s} / {f2.s})", x => f1.f(x) / f2.f(x))); + CreateProduction(nt_binop, t_mul, _ => (f1, f2) => ($"({f1.s} * {f2.s})", x => f1.f(x) * f2.f(x))); + CreateProduction(nt_binop, t_sub, _ => (f1, f2) => ($"({f1.s} - {f2.s})", x => f1.f(x) - f2.f(x))); + CreateProduction(nt_binop, t_add, _ => (f1, f2) => ($"({f1.s} + {f2.s})", x => f1.f(x) + f2.f(x))); + CreateProduction(nt_expr, t_var, l => ("X", x => x)); + CreateProduction(nt_expr, t_lit, l => (l.x, _ => l.i)); + CreateProduction(nt_expr, t_oparen, nt_expr, t_cparen, (x, y, z) => ($"({y.s})", u => y.f(u))); + CreateProduction(nt_expr, nt_unop, nt_expr, (x, y) => x(y)).SetPrecedence(prec_u); + CreateProduction(nt_expr, nt_expr, nt_binop, nt_expr, (x, y, z) => y(x, z)).SetPrecedence(prec_b); + + Configurator.LexerSettings.Ignore = new[] + { + @"\s+", + @"/\*[^(\*/)]*\*/", + @"//[^\n]*\n" + }; + } + } + } +} \ No newline at end of file diff --git a/Demo/Lexer/Movement.cs b/Demo/Lexer/Movement.cs index 06421f3..8fb31d5 100644 --- a/Demo/Lexer/Movement.cs +++ b/Demo/Lexer/Movement.cs @@ -1,5 +1,6 @@ using System; using Piglet.Lexer; +using Piglet.Lexer.Runtime; namespace Piglet.Demo.Lexer { @@ -37,9 +38,9 @@ public static void Run() configurator.Ignore(@"\s+"); }); - foreach (var token in lexer.Tokenize("up down left right right north west left north up")) + foreach ((int number, LexedToken token) in lexer.Tokenize("up down left right right north west left north up")) { - Console.WriteLine("{0} Current position is {1},{2}", token.Item2, positionX, positionY); + Console.WriteLine("{0} Current position is {1},{2}", token.SymbolValue, positionX, positionY); } Console.WriteLine(System.DateTime.Now.Ticks - ticks); diff --git a/Demo/Lexer/WordsAndNumbers.cs b/Demo/Lexer/WordsAndNumbers.cs index b3cf0d2..5f10204 100644 --- a/Demo/Lexer/WordsAndNumbers.cs +++ b/Demo/Lexer/WordsAndNumbers.cs @@ -1,5 +1,6 @@ using System; using Piglet.Lexer; +using Piglet.Lexer.Runtime; namespace Piglet.Demo.Lexer { @@ -22,17 +23,9 @@ public static void Run() // Run the lexer string input = "10 piglets 5 boars 1 big sow"; - foreach (var token in lexer.Tokenize(input)) - { - if (token.Item2 is int) - { - Console.WriteLine("Lexer found an integer {0}", token.Item2); - } - else - { - Console.WriteLine("Lexer found a string {0}", token.Item2); - } - } + + foreach ((int number, LexedToken token) in lexer.Tokenize(input)) + Console.WriteLine($"Lexer found {(token.SymbolValue is int ? "an integer" : "a string")} {token.SymbolValue}"); } } } diff --git a/Demo/Piglet.Demo.csproj b/Demo/Piglet.Demo.csproj index 5e0819d..444d79d 100644 --- a/Demo/Piglet.Demo.csproj +++ b/Demo/Piglet.Demo.csproj @@ -1,67 +1,16 @@  - - - Debug - AnyCPU - 8.0.30703 - 2.0 - {B23B66AD-EA0F-4254-BDE6-C4EF883BD5B0} - Exe - Properties - Piglet.Demo - Piglet.Demo - v4.0 - 512 - - - true - full - false - bin\Debug\ - DEBUG;TRACE - prompt - 4 - - - pdbonly - true - bin\Release\ - TRACE - prompt - 4 - - - Piglet.Demo.Demo - - - - - - - - - - - - - - - - - - - - - {1E23F251-94E1-4504-81E8-2618F5C9FEA3} - Piglet - - - - + + + netcoreapp3.1 + 8.0.30703 + 2.0 + Exe + Piglet.Demo + Piglet.Demo + bin\Debug\ + false + + + + \ No newline at end of file diff --git a/Demo/Properties/AssemblyInfo.cs b/Demo/Properties/AssemblyInfo.cs deleted file mode 100644 index 0198f32..0000000 --- a/Demo/Properties/AssemblyInfo.cs +++ /dev/null @@ -1,25 +0,0 @@ -using System.Reflection; -using System.Runtime.InteropServices; - -// General Information about an assembly is controlled through the following -// set of attributes. Change these attribute values to modify the information -// associated with an assembly. -[assembly: AssemblyTitle("Demo")] -[assembly: AssemblyDescription("")] -[assembly: AssemblyConfiguration("")] -[assembly: AssemblyCompany("")] -[assembly: AssemblyTrademark("")] -[assembly: AssemblyCulture("")] - -// The following GUID is for the ID of the typelib if this project is exposed to COM -[assembly: Guid("70c31d99-7aa8-43ea-b4cc-a97bcf3d0ae8")] - -// [assembly: AssemblyCompany("")] -[assembly: AssemblyProduct("Piglet")] -[assembly: AssemblyCopyright("Copyright © 2012")] -// [assembly: AssemblyTrademark("")] -// [assembly: AssemblyCulture("")] - - -[assembly: AssemblyVersion("1.1.0")] -[assembly: AssemblyFileVersion("1.1.0")] \ No newline at end of file diff --git a/Demo/app.config b/Demo/app.config new file mode 100644 index 0000000..245587d --- /dev/null +++ b/Demo/app.config @@ -0,0 +1,3 @@ + + + diff --git a/Piglet.FSharp/FSharpParsingUitilities.fs b/Piglet.FSharp/FSharpParsingUitilities.fs new file mode 100644 index 0000000..c569cb2 --- /dev/null +++ b/Piglet.FSharp/FSharpParsingUitilities.fs @@ -0,0 +1,426 @@ + +/////////////////////////////////////////////////////////////////////// +// AUTOGENERATED 2020-06-11 22:00:50.060361 // +// All your changes to this file will be lost upon re-generation. // +/////////////////////////////////////////////////////////////////////// + +namespace Piglet.Parser.Configuration.FSharp + +open Piglet.Parser.Configuration.Generic +open System + + +type ParserConstructor<'TOut> with + // TODO : ? + end + +/// A module containing F# code extensions for the namespace 'Piglet.Parser.Configuration.Generic'. +[] +module FSharpExtensions = + + /// + /// Creates a new production rule to reduce the given symbol '' into a constant provided by the function ''. + /// + /// This function returns the newly created production rule. To discard the return value, either use "|> ignore" or use the function . + /// + /// The start symbol for this production. + /// The reduce function. + /// The newly created production rule. + let reduce_c (start : NonTerminalWrapper<'a>) func = start.AddProduction().SetReduceFunction(Func<'a>(func)) + + /// + /// Creates a new production rule to reduce the given symbol '' into a constant provided by the function ''. + /// + /// This function does not returns the newly created production rule. Use the function if you intend to reference the production rule. + /// + /// The start symbol for this production. + /// The reduce function. + let reduce_ci (start : NonTerminalWrapper<'a>) func = ignore <| reduce_c start func + + /// + /// Creates a new production rule to reduce the given symbol '' to the given symbol '' using an identity mapping. + /// + /// This function returns the newly created production rule. To discard the return value, either use "|> ignore" or use the function . + /// + /// The start symbol for this production. + /// The reduced symbol. + /// The newly created production rule. + let reduce_0 (start : NonTerminalWrapper<'a>) symbol = start.AddProduction(symbol).SetReduceToFirst() + + /// + /// Creates a new production rule to reduce the given symbol '' to the given symbol '' using an identity mapping. + /// + /// This function does not returns the newly created production rule. Use the function if you intend to reference the production rule. + /// + /// The start symbol for this production. + /// The reduced symbol. + /// The newly created production rule. + let reduce_0i (start : NonTerminalWrapper<'a>) symbol = ignore <| reduce_0 start symbol + + /// + /// Creates a new production rule to reduce the given symbol '' to the given 1 symbol(s) using the given production function. + /// + /// This function returns the newly created production rule. To discard the return value, either use "|> ignore" or use the function . + /// + /// The start symbol for this production.The reduce function. + /// The newly created production rule. + let reduce_1 (start : NonTerminalWrapper<'a>) symbol1 func = + start.AddProduction(symbol1).SetReduceFunction(Func<_, 'a>(func)) + + + /// + /// Creates a new production rule to reduce the given symbol '' to the given 1 symbol(s) using the given production function. + /// + /// This function does not returns the newly created production rule. Use the function if you intend to reference the production rule. + /// + /// The start symbol for this production. + /// The reduce function. + /// The newly created production rule. + let reduce_1i (start : NonTerminalWrapper<'a>) symbol1 func = + reduce_1 start symbol1 func |> ignore + + /// + /// Creates a new production rule to reduce the given symbol '' to the given 2 symbol(s) using the given production function. + /// + /// This function returns the newly created production rule. To discard the return value, either use "|> ignore" or use the function . + /// + /// The start symbol for this production.The reduce function. + /// The newly created production rule. + let reduce_2 (start : NonTerminalWrapper<'a>) symbol1 symbol2 func = + start.AddProduction(symbol1, symbol2).SetReduceFunction(Func<_, _, 'a>(func)) + + + /// + /// Creates a new production rule to reduce the given symbol '' to the given 2 symbol(s) using the given production function. + /// + /// This function does not returns the newly created production rule. Use the function if you intend to reference the production rule. + /// + /// The start symbol for this production. + /// The reduce function. + /// The newly created production rule. + let reduce_2i (start : NonTerminalWrapper<'a>) symbol1 symbol2 func = + reduce_2 start symbol1 symbol2 func |> ignore + + /// + /// Creates a new production rule to reduce the given symbol '' to the given 3 symbol(s) using the given production function. + /// + /// This function returns the newly created production rule. To discard the return value, either use "|> ignore" or use the function . + /// + /// The start symbol for this production.The reduce function. + /// The newly created production rule. + let reduce_3 (start : NonTerminalWrapper<'a>) symbol1 symbol2 symbol3 func = + start.AddProduction(symbol1, symbol2, symbol3).SetReduceFunction(Func<_, _, _, 'a>(func)) + + + /// + /// Creates a new production rule to reduce the given symbol '' to the given 3 symbol(s) using the given production function. + /// + /// This function does not returns the newly created production rule. Use the function if you intend to reference the production rule. + /// + /// The start symbol for this production. + /// The reduce function. + /// The newly created production rule. + let reduce_3i (start : NonTerminalWrapper<'a>) symbol1 symbol2 symbol3 func = + reduce_3 start symbol1 symbol2 symbol3 func |> ignore + + /// + /// Creates a new production rule to reduce the given symbol '' to the given 4 symbol(s) using the given production function. + /// + /// This function returns the newly created production rule. To discard the return value, either use "|> ignore" or use the function . + /// + /// The start symbol for this production.The reduce function. + /// The newly created production rule. + let reduce_4 (start : NonTerminalWrapper<'a>) symbol1 symbol2 symbol3 symbol4 func = + start.AddProduction(symbol1, symbol2, symbol3, symbol4).SetReduceFunction(Func<_, _, _, _, 'a>(func)) + + + /// + /// Creates a new production rule to reduce the given symbol '' to the given 4 symbol(s) using the given production function. + /// + /// This function does not returns the newly created production rule. Use the function if you intend to reference the production rule. + /// + /// The start symbol for this production. + /// The reduce function. + /// The newly created production rule. + let reduce_4i (start : NonTerminalWrapper<'a>) symbol1 symbol2 symbol3 symbol4 func = + reduce_4 start symbol1 symbol2 symbol3 symbol4 func |> ignore + + /// + /// Creates a new production rule to reduce the given symbol '' to the given 5 symbol(s) using the given production function. + /// + /// This function returns the newly created production rule. To discard the return value, either use "|> ignore" or use the function . + /// + /// The start symbol for this production.The reduce function. + /// The newly created production rule. + let reduce_5 (start : NonTerminalWrapper<'a>) symbol1 symbol2 symbol3 symbol4 symbol5 func = + start.AddProduction(symbol1, symbol2, symbol3, symbol4, symbol5).SetReduceFunction(Func<_, _, _, _, _, 'a>(func)) + + + /// + /// Creates a new production rule to reduce the given symbol '' to the given 5 symbol(s) using the given production function. + /// + /// This function does not returns the newly created production rule. Use the function if you intend to reference the production rule. + /// + /// The start symbol for this production. + /// The reduce function. + /// The newly created production rule. + let reduce_5i (start : NonTerminalWrapper<'a>) symbol1 symbol2 symbol3 symbol4 symbol5 func = + reduce_5 start symbol1 symbol2 symbol3 symbol4 symbol5 func |> ignore + + /// + /// Creates a new production rule to reduce the given symbol '' to the given 6 symbol(s) using the given production function. + /// + /// This function returns the newly created production rule. To discard the return value, either use "|> ignore" or use the function . + /// + /// The start symbol for this production.The reduce function. + /// The newly created production rule. + let reduce_6 (start : NonTerminalWrapper<'a>) symbol1 symbol2 symbol3 symbol4 symbol5 symbol6 func = + start.AddProduction(symbol1, symbol2, symbol3, symbol4, symbol5, symbol6).SetReduceFunction(Func<_, _, _, _, _, _, 'a>(func)) + + + /// + /// Creates a new production rule to reduce the given symbol '' to the given 6 symbol(s) using the given production function. + /// + /// This function does not returns the newly created production rule. Use the function if you intend to reference the production rule. + /// + /// The start symbol for this production. + /// The reduce function. + /// The newly created production rule. + let reduce_6i (start : NonTerminalWrapper<'a>) symbol1 symbol2 symbol3 symbol4 symbol5 symbol6 func = + reduce_6 start symbol1 symbol2 symbol3 symbol4 symbol5 symbol6 func |> ignore + + /// + /// Creates a new production rule to reduce the given symbol '' to the given 7 symbol(s) using the given production function. + /// + /// This function returns the newly created production rule. To discard the return value, either use "|> ignore" or use the function . + /// + /// The start symbol for this production.The reduce function. + /// The newly created production rule. + let reduce_7 (start : NonTerminalWrapper<'a>) symbol1 symbol2 symbol3 symbol4 symbol5 symbol6 symbol7 func = + start.AddProduction(symbol1, symbol2, symbol3, symbol4, symbol5, symbol6, symbol7).SetReduceFunction(Func<_, _, _, _, _, _, _, 'a>(func)) + + + /// + /// Creates a new production rule to reduce the given symbol '' to the given 7 symbol(s) using the given production function. + /// + /// This function does not returns the newly created production rule. Use the function if you intend to reference the production rule. + /// + /// The start symbol for this production. + /// The reduce function. + /// The newly created production rule. + let reduce_7i (start : NonTerminalWrapper<'a>) symbol1 symbol2 symbol3 symbol4 symbol5 symbol6 symbol7 func = + reduce_7 start symbol1 symbol2 symbol3 symbol4 symbol5 symbol6 symbol7 func |> ignore + + /// + /// Creates a new production rule to reduce the given symbol '' to the given 8 symbol(s) using the given production function. + /// + /// This function returns the newly created production rule. To discard the return value, either use "|> ignore" or use the function . + /// + /// The start symbol for this production.The reduce function. + /// The newly created production rule. + let reduce_8 (start : NonTerminalWrapper<'a>) symbol1 symbol2 symbol3 symbol4 symbol5 symbol6 symbol7 symbol8 func = + start.AddProduction(symbol1, symbol2, symbol3, symbol4, symbol5, symbol6, symbol7, symbol8).SetReduceFunction(Func<_, _, _, _, _, _, _, _, 'a>(func)) + + + /// + /// Creates a new production rule to reduce the given symbol '' to the given 8 symbol(s) using the given production function. + /// + /// This function does not returns the newly created production rule. Use the function if you intend to reference the production rule. + /// + /// The start symbol for this production. + /// The reduce function. + /// The newly created production rule. + let reduce_8i (start : NonTerminalWrapper<'a>) symbol1 symbol2 symbol3 symbol4 symbol5 symbol6 symbol7 symbol8 func = + reduce_8 start symbol1 symbol2 symbol3 symbol4 symbol5 symbol6 symbol7 symbol8 func |> ignore + + /// + /// Creates a new production rule to reduce the given symbol '' to the given 9 symbol(s) using the given production function. + /// + /// This function returns the newly created production rule. To discard the return value, either use "|> ignore" or use the function . + /// + /// The start symbol for this production.The reduce function. + /// The newly created production rule. + let reduce_9 (start : NonTerminalWrapper<'a>) symbol1 symbol2 symbol3 symbol4 symbol5 symbol6 symbol7 symbol8 symbol9 func = + start.AddProduction(symbol1, symbol2, symbol3, symbol4, symbol5, symbol6, symbol7, symbol8, symbol9).SetReduceFunction(Func<_, _, _, _, _, _, _, _, _, 'a>(func)) + + + /// + /// Creates a new production rule to reduce the given symbol '' to the given 9 symbol(s) using the given production function. + /// + /// This function does not returns the newly created production rule. Use the function if you intend to reference the production rule. + /// + /// The start symbol for this production. + /// The reduce function. + /// The newly created production rule. + let reduce_9i (start : NonTerminalWrapper<'a>) symbol1 symbol2 symbol3 symbol4 symbol5 symbol6 symbol7 symbol8 symbol9 func = + reduce_9 start symbol1 symbol2 symbol3 symbol4 symbol5 symbol6 symbol7 symbol8 symbol9 func |> ignore + + /// + /// Creates a new production rule to reduce the given symbol '' to the given 10 symbol(s) using the given production function. + /// + /// This function returns the newly created production rule. To discard the return value, either use "|> ignore" or use the function . + /// + /// The start symbol for this production.The reduce function. + /// The newly created production rule. + let reduce_10 (start : NonTerminalWrapper<'a>) symbol1 symbol2 symbol3 symbol4 symbol5 symbol6 symbol7 symbol8 symbol9 symbol10 func = + start.AddProduction(symbol1, symbol2, symbol3, symbol4, symbol5, symbol6, symbol7, symbol8, symbol9, symbol10).SetReduceFunction(Func<_, _, _, _, _, _, _, _, _, _, 'a>(func)) + + + /// + /// Creates a new production rule to reduce the given symbol '' to the given 10 symbol(s) using the given production function. + /// + /// This function does not returns the newly created production rule. Use the function if you intend to reference the production rule. + /// + /// The start symbol for this production. + /// The reduce function. + /// The newly created production rule. + let reduce_10i (start : NonTerminalWrapper<'a>) symbol1 symbol2 symbol3 symbol4 symbol5 symbol6 symbol7 symbol8 symbol9 symbol10 func = + reduce_10 start symbol1 symbol2 symbol3 symbol4 symbol5 symbol6 symbol7 symbol8 symbol9 symbol10 func |> ignore + + /// + /// Creates a new production rule to reduce the given symbol '' to the given 11 symbol(s) using the given production function. + /// + /// This function returns the newly created production rule. To discard the return value, either use "|> ignore" or use the function . + /// + /// The start symbol for this production.The reduce function. + /// The newly created production rule. + let reduce_11 (start : NonTerminalWrapper<'a>) symbol1 symbol2 symbol3 symbol4 symbol5 symbol6 symbol7 symbol8 symbol9 symbol10 symbol11 func = + start.AddProduction(symbol1, symbol2, symbol3, symbol4, symbol5, symbol6, symbol7, symbol8, symbol9, symbol10, symbol11).SetReduceFunction(Func<_, _, _, _, _, _, _, _, _, _, _, 'a>(func)) + + + /// + /// Creates a new production rule to reduce the given symbol '' to the given 11 symbol(s) using the given production function. + /// + /// This function does not returns the newly created production rule. Use the function if you intend to reference the production rule. + /// + /// The start symbol for this production. + /// The reduce function. + /// The newly created production rule. + let reduce_11i (start : NonTerminalWrapper<'a>) symbol1 symbol2 symbol3 symbol4 symbol5 symbol6 symbol7 symbol8 symbol9 symbol10 symbol11 func = + reduce_11 start symbol1 symbol2 symbol3 symbol4 symbol5 symbol6 symbol7 symbol8 symbol9 symbol10 symbol11 func |> ignore + + /// + /// Creates a new production rule to reduce the given symbol '' to the given 12 symbol(s) using the given production function. + /// + /// This function returns the newly created production rule. To discard the return value, either use "|> ignore" or use the function . + /// + /// The start symbol for this production.The reduce function. + /// The newly created production rule. + let reduce_12 (start : NonTerminalWrapper<'a>) symbol1 symbol2 symbol3 symbol4 symbol5 symbol6 symbol7 symbol8 symbol9 symbol10 symbol11 symbol12 func = + start.AddProduction(symbol1, symbol2, symbol3, symbol4, symbol5, symbol6, symbol7, symbol8, symbol9, symbol10, symbol11, symbol12).SetReduceFunction(Func<_, _, _, _, _, _, _, _, _, _, _, _, 'a>(func)) + + + /// + /// Creates a new production rule to reduce the given symbol '' to the given 12 symbol(s) using the given production function. + /// + /// This function does not returns the newly created production rule. Use the function if you intend to reference the production rule. + /// + /// The start symbol for this production. + /// The reduce function. + /// The newly created production rule. + let reduce_12i (start : NonTerminalWrapper<'a>) symbol1 symbol2 symbol3 symbol4 symbol5 symbol6 symbol7 symbol8 symbol9 symbol10 symbol11 symbol12 func = + reduce_12 start symbol1 symbol2 symbol3 symbol4 symbol5 symbol6 symbol7 symbol8 symbol9 symbol10 symbol11 symbol12 func |> ignore + + /// + /// Creates a new production rule to reduce the given symbol '' to the given 13 symbol(s) using the given production function. + /// + /// This function returns the newly created production rule. To discard the return value, either use "|> ignore" or use the function . + /// + /// The start symbol for this production.The reduce function. + /// The newly created production rule. + let reduce_13 (start : NonTerminalWrapper<'a>) symbol1 symbol2 symbol3 symbol4 symbol5 symbol6 symbol7 symbol8 symbol9 symbol10 symbol11 symbol12 symbol13 func = + start.AddProduction(symbol1, symbol2, symbol3, symbol4, symbol5, symbol6, symbol7, symbol8, symbol9, symbol10, symbol11, symbol12, symbol13).SetReduceFunction(Func<_, _, _, _, _, _, _, _, _, _, _, _, _, 'a>(func)) + + + /// + /// Creates a new production rule to reduce the given symbol '' to the given 13 symbol(s) using the given production function. + /// + /// This function does not returns the newly created production rule. Use the function if you intend to reference the production rule. + /// + /// The start symbol for this production. + /// The reduce function. + /// The newly created production rule. + let reduce_13i (start : NonTerminalWrapper<'a>) symbol1 symbol2 symbol3 symbol4 symbol5 symbol6 symbol7 symbol8 symbol9 symbol10 symbol11 symbol12 symbol13 func = + reduce_13 start symbol1 symbol2 symbol3 symbol4 symbol5 symbol6 symbol7 symbol8 symbol9 symbol10 symbol11 symbol12 symbol13 func |> ignore + + /// + /// Creates a new production rule to reduce the given symbol '' to the given 14 symbol(s) using the given production function. + /// + /// This function returns the newly created production rule. To discard the return value, either use "|> ignore" or use the function . + /// + /// The start symbol for this production.The reduce function. + /// The newly created production rule. + let reduce_14 (start : NonTerminalWrapper<'a>) symbol1 symbol2 symbol3 symbol4 symbol5 symbol6 symbol7 symbol8 symbol9 symbol10 symbol11 symbol12 symbol13 symbol14 func = + start.AddProduction(symbol1, symbol2, symbol3, symbol4, symbol5, symbol6, symbol7, symbol8, symbol9, symbol10, symbol11, symbol12, symbol13, symbol14).SetReduceFunction(Func<_, _, _, _, _, _, _, _, _, _, _, _, _, _, 'a>(func)) + + + /// + /// Creates a new production rule to reduce the given symbol '' to the given 14 symbol(s) using the given production function. + /// + /// This function does not returns the newly created production rule. Use the function if you intend to reference the production rule. + /// + /// The start symbol for this production. + /// The reduce function. + /// The newly created production rule. + let reduce_14i (start : NonTerminalWrapper<'a>) symbol1 symbol2 symbol3 symbol4 symbol5 symbol6 symbol7 symbol8 symbol9 symbol10 symbol11 symbol12 symbol13 symbol14 func = + reduce_14 start symbol1 symbol2 symbol3 symbol4 symbol5 symbol6 symbol7 symbol8 symbol9 symbol10 symbol11 symbol12 symbol13 symbol14 func |> ignore + + /// + /// Creates a new production rule to reduce the given symbol '' to the given 15 symbol(s) using the given production function. + /// + /// This function returns the newly created production rule. To discard the return value, either use "|> ignore" or use the function . + /// + /// The start symbol for this production.The reduce function. + /// The newly created production rule. + let reduce_15 (start : NonTerminalWrapper<'a>) symbol1 symbol2 symbol3 symbol4 symbol5 symbol6 symbol7 symbol8 symbol9 symbol10 symbol11 symbol12 symbol13 symbol14 symbol15 func = + start.AddProduction(symbol1, symbol2, symbol3, symbol4, symbol5, symbol6, symbol7, symbol8, symbol9, symbol10, symbol11, symbol12, symbol13, symbol14, symbol15).SetReduceFunction(Func<_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, 'a>(func)) + + + /// + /// Creates a new production rule to reduce the given symbol '' to the given 15 symbol(s) using the given production function. + /// + /// This function does not returns the newly created production rule. Use the function if you intend to reference the production rule. + /// + /// The start symbol for this production. + /// The reduce function. + /// The newly created production rule. + let reduce_15i (start : NonTerminalWrapper<'a>) symbol1 symbol2 symbol3 symbol4 symbol5 symbol6 symbol7 symbol8 symbol9 symbol10 symbol11 symbol12 symbol13 symbol14 symbol15 func = + reduce_15 start symbol1 symbol2 symbol3 symbol4 symbol5 symbol6 symbol7 symbol8 symbol9 symbol10 symbol11 symbol12 symbol13 symbol14 symbol15 func |> ignore + + /// + /// Creates a new production rule to reduce the given symbol '' to the given 16 symbol(s) using the given production function. + /// + /// This function returns the newly created production rule. To discard the return value, either use "|> ignore" or use the function . + /// + /// The start symbol for this production.The reduce function. + /// The newly created production rule. + let reduce_16 (start : NonTerminalWrapper<'a>) symbol1 symbol2 symbol3 symbol4 symbol5 symbol6 symbol7 symbol8 symbol9 symbol10 symbol11 symbol12 symbol13 symbol14 symbol15 symbol16 func = + start.AddProduction(symbol1, symbol2, symbol3, symbol4, symbol5, symbol6, symbol7, symbol8, symbol9, symbol10, symbol11, symbol12, symbol13, symbol14, symbol15, symbol16).SetReduceFunction(Func<_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, 'a>(func)) + + + /// + /// Creates a new production rule to reduce the given symbol '' to the given 16 symbol(s) using the given production function. + /// + /// This function does not returns the newly created production rule. Use the function if you intend to reference the production rule. + /// + /// The start symbol for this production. + /// The reduce function. + /// The newly created production rule. + let reduce_16i (start : NonTerminalWrapper<'a>) symbol1 symbol2 symbol3 symbol4 symbol5 symbol6 symbol7 symbol8 symbol9 symbol10 symbol11 symbol12 symbol13 symbol14 symbol15 symbol16 func = + reduce_16 start symbol1 symbol2 symbol3 symbol4 symbol5 symbol6 symbol7 symbol8 symbol9 symbol10 symbol11 symbol12 symbol13 symbol14 symbol15 symbol16 func |> ignore diff --git a/Piglet.FSharp/Piglet.FSharp.fsproj b/Piglet.FSharp/Piglet.FSharp.fsproj new file mode 100644 index 0000000..9a79e16 --- /dev/null +++ b/Piglet.FSharp/Piglet.FSharp.fsproj @@ -0,0 +1,31 @@ + + + netcoreapp3.1;netcoreapp3.0;netstandard2.1 + Unknown6656 + Piglet.FSharp + Unknown6656 + [TODO] + https://github.com/unknown6656/Piglet + git + MIT + https://github.com/unknown6656/Piglet + Piglet.FSharp + ..\bin + false + + + + + + + + + + + + + + + + + diff --git a/Piglet.GenericGenerator/Piglet.GenericGenerator.csproj b/Piglet.GenericGenerator/Piglet.GenericGenerator.csproj new file mode 100644 index 0000000..614cf83 --- /dev/null +++ b/Piglet.GenericGenerator/Piglet.GenericGenerator.csproj @@ -0,0 +1,10 @@ + + + Exe + $(ProjectDir)bin + netcoreapp3.1 + false + generator + Piglet.GenericGenerator.Program + + diff --git a/Piglet.GenericGenerator/Program.cs b/Piglet.GenericGenerator/Program.cs new file mode 100644 index 0000000..abdf4a2 --- /dev/null +++ b/Piglet.GenericGenerator/Program.cs @@ -0,0 +1,633 @@ +using System.Collections.Generic; +using System.Linq; +using System.IO; +using System; + +namespace Piglet.GenericGenerator +{ + public static class Program + { + public static int MAX_SIZE = 16; + + + public static int Main(string[] args) + { + if (args.Length < 2) + { + Console.Error.WriteLine($@"Usage: + {new FileInfo(typeof(Program).Assembly.Location).Name} [size] + +Arguments: + target-file The path to the target .cs or .fs file which will contain the generated classes. + type 'cs' or 'fs'. This determines it generates the C# or F# helper classes. + size Number of classes/production wrappers to generate. +"); + + return -1; + } + + FileInfo target_file = new FileInfo(args[0]); + DirectoryInfo parent = target_file.Directory; + + if (!parent.Exists) + parent.Create(); + + if (args.Length > 2 && int.TryParse(args[2], out int size)) + MAX_SIZE = size; + + if (target_file.Exists) + target_file.Delete(); + + using FileStream stream = new FileStream(target_file.FullName, FileMode.Create, FileAccess.ReadWrite, FileShare.ReadWrite | FileShare.Delete); + using StreamWriter writer = new StreamWriter(stream); + + switch (args[1].ToLowerInvariant()) + { + case "cs": + CreateGenericExtensions(writer); + + break; + case "fs": + CreateFSharpExtensions(writer); + + break; + default: + Console.Error.WriteLine($"Invalid output type '{args[1]}'. Expected 'cs' or 'fs'."); + + return -1; + } + + writer.Flush(); + stream.Flush(); + + return 0; + } + + private static void CreateGenericExtensions(StreamWriter writer) + { + writer.Write($@" +/////////////////////////////////////////////////////////////////////// +// AUTOGENERATED {DateTime.Now:yyyy-MM-dd HH:mm:ss.ffffff} // +// All your changes to this file will be lost upon re-generation. // +/////////////////////////////////////////////////////////////////////// + +using System.Diagnostics.CodeAnalysis; +using System.Collections.Generic; +using System.Linq; +using System; + +using Piglet.Parser.Construction; +using Piglet.Lexer.Runtime; +using Piglet.Lexer; + + +namespace Piglet.Parser.Configuration.Generic +{{ + /// + /// Represents an abstract generic parser constructor. + /// + /// The parser based on this constructor will return a parsed value of the type . + /// + /// The generic value return type of the parser. + public abstract class ParserConstructor + {{ + private ParserWrapper? _parser = null; + private volatile int _ntcounter = 0; + + /// + /// The parser configuration. + /// + public IParserConfigurator Configurator {{ get; }} + + + /// + /// Creates a new generic parser constructor with the default parser configuration. + /// + public ParserConstructor() + : this(ParserFactory.Configure()) + {{ + }} + + /// + /// Creates a new generic parser constructor with the given parser configuration. + /// + /// Parser configuration. + public ParserConstructor(IParserConfigurator configurator) => Configurator = configurator; + + /// + /// Creates a new non-terminal symbol with the given generic semantic value and name. + /// + /// Generic semantic value stored inside the new non-terminal symbol. + /// The name of the new non-terminal symbol. + /// The newly created non-terminal symbol. + protected NonTerminalWrapper CreateNonTerminal(string name) => new NonTerminalWrapper(Configurator.CreateNonTerminal(name)); + + /// + /// Creates a new non-terminal symbol with the given generic semantic value and the default name for non-terminals (""NT...""). + /// + /// Generic semantic value stored inside the new non-terminal symbol. + /// The newly created non-terminal symbol. + protected NonTerminalWrapper CreateNonTerminal() => CreateNonTerminal($""NT{{++_ntcounter}}""); + + /// + /// Creates a new terminal symbol associated with the given regex string and generic value. + /// + /// Generic semantic value stored inside the new terminal symbol. + /// The regex string associated with the terminal symbol. + /// The value stored inside the new terminal value. + /// The newly created terminal symbol. + protected TerminalWrapper CreateTerminal(string regex, T value) => CreateTerminal(regex, _ => value); + + /// + /// Creates a new terminal symbol associated with the given regex string and the function providing the generic value. + /// + /// Generic semantic value stored inside the new terminal symbol. + /// The regex string associated with the terminal symbol. + /// The function providing the generic value represented by the terminal symbol. + /// The newly created terminal symbol. + protected TerminalWrapper CreateTerminal(string regex, Func func) => new TerminalWrapper(Configurator.CreateTerminal(regex, s => func(s))); + + /// + /// Creates a new terminal symbol associated with the given regex string and the identity function (of the type ). + /// + /// The regex string associated with the terminal symbol. + /// The newly created terminal symbol. + protected TerminalWrapper CreateTerminal(string regex) => new TerminalWrapper(Configurator.CreateTerminal(regex)); + + /// + /// Sets the precedence for all given symbols in ascending order. The first symbol group is therefore considered to have the lowest precedence and the last symbol group the highest precedence. + /// + /// Ordered collection of groups containing a set of symbols with their corresponding associativity. + protected void SetPrecedenceList(params (AssociativityDirection direction, ITerminalWrapper[] symbols)[] groups) + {{ + foreach ((AssociativityDirection d, ITerminalWrapper[] s) in groups) + SetAssociativity(d, s); + }} + + /// + /// Sets the given associativity to all symbols in the given symbol collection. All sybols will be considered to have the same precedence group. + /// + /// Associativity direction. + /// Target symbols. + /// The precedence group associated with the given symbols. + protected IPrecedenceGroup SetAssociativity(AssociativityDirection dir, params ITerminalWrapper[] symbols) + {{ + ITerminal[] arr = symbols.Select(s => s.Symbol).ToArray(); + + switch (dir) + {{ + case AssociativityDirection.Left: + return Configurator.LeftAssociative(arr); + case AssociativityDirection.Right: + return Configurator.RightAssociative(arr); + default: + throw new ArgumentOutOfRangeException(nameof(dir)); + }} + }} + + /// + /// Creates a new production rule on the given non-terminal symbol using the given production function. + ///
+ /// This production represents the reducing of the given non-terminal to the given symbol. + ///
+ /// The generic type of . + /// The generic type of . + /// The non-terminal symbol which gets reduced. + /// The symbol, to which gets reduced. + protected void CreateProduction(NonTerminalWrapper non_terminal, SymbolWrapper symbol0) => non_terminal.AddProduction(symbol0).SetReduceToFirst(); +"); + + for (int i = 0; i <= MAX_SIZE; ++i) + { + string types = string.Concat(range(0, i).Select(x => $", S{x}")); + string ftypes = string.Concat(range(0, i).Select(x => $"S{x}, ")); + string names = string.Join(", ", range(0, i).Select(x => $"symbol{x}")); + string args = string.Concat(range(0, i).Select(x => $", SymbolWrapper symbol{x}")); + + writer.Write($@" + /// + /// Creates a new production rule on the given non-terminal symbol using the given production function. + ///
+ /// This production represents the reducing of the given non-terminal to the {i} given symbols. + ///
+ /// The non-terminal symbol which gets reduced. + /// The generic type of ."); + + for (int j = 0; j < i; ++j) + writer.Write($@" + /// The generic type of ."); + + for (int j = 0; j < i; ++j) + writer.Write($@" + /// The symbol no. {j}, to which the current non-terminal symbol gets reduced."); + + writer.Write($@" + /// The generic production function which gets called upon reduction. The function accepts the value stored inside the produced symbols, and returns the value to be stored inside the non-terminal symbol """". + /// The newly created production rule. + protected ProductionWrapper<{ftypes}T> CreateProduction(NonTerminalWrapper non_terminal{args}, Func<{ftypes}T> func) => non_terminal.AddProduction({names}).SetReduceFunction(func); +"); + } + + writer.Write(@" + /// + /// Creates a new parser based on the current configurator and returns it. + /// + /// Note: Only the first call of creates a new parser. If you wish to reset the generated parser and re-create it, call the method beforehand. + /// + /// The constructed parser. + public ParserWrapper CreateParser() + { + if (_parser is null) + { + NonTerminalWrapper nt = CreateNonTerminal(); + + Construct(nt); + + Configurator.SetStartSymbol((INonTerminal)nt.Symbol); + _parser = new ParserWrapper(Configurator.CreateParser()); + } + + return _parser; + } + + /// + /// Resets the constructed parser to , thereby forcing the parser to be re-constructed based on the current conficuration the next time is called. + /// + public void ResetParser() => _parser = null; + + /// + /// Constructs the parser. This method must be implemented by every constructor based on + /// + /// The non-terminal production start symbol. The value of this symbol will be returned when the constructed parser is executed. + protected abstract void Construct(NonTerminalWrapper start_symbol); + + + /// + /// Represents a wrapper for the generic parser. + /// + public sealed class ParserWrapper + { + /// + /// The internal parser instance. + /// + public IParser Parser { get; } + + + internal ParserWrapper(IParser parser) => Parser = parser; + + /// + /// Tries to parse the given string and returns whether the parsing was successful. + /// + /// The input string. + /// The parsed value. + public bool TryParse(string input, [MaybeNullWhen(false)] out ParserResult? result) + { + try + { + result = Parse(input); + } + catch (LexerException) + { + result = null; + } + + return result is { }; + } + + /// + /// Parses the given string and returns the parsed value of the type . + /// + /// The input string. + /// The parsed value. + public ParserResult Parse(string input) + { + List> tokens = new List>(); + void aggregate(LexedToken token) + { + tokens.Add(token); + + if (token is LexedNonTerminal nt) + foreach (LexedToken child in nt.ChildNodes) + aggregate(child); + } + + aggregate(Parser.ParseTokens(input)); + + return new ParserResult((TOut)tokens[0].SymbolValue, input.Split('\n'), tokens.ToArray()); + } + } + } + + public sealed class ParserResult + { + public TOut ParsedValue { get; } + public string[] SourceLines { get; } + public LexedToken[] LexedTokens { get; } + + + internal ParserResult(TOut parsedValue, string[] sourceLines, LexedToken[] lexedTokens) + { + ParsedValue = parsedValue; + SourceLines = sourceLines; + LexedTokens = lexedTokens; + } + + public static implicit operator TOut(ParserResult res) => res.ParsedValue; + } + + /// + /// An interface for generic terminal wrappers. + /// + public interface ITerminalWrapper + { + /// + /// The underlying terminal symbol. + /// + ITerminal Symbol { get; } + } + + /// + /// Represents a generic symbol wrapper. + /// + /// The generic type stored inside the symbol. + public class SymbolWrapper + { + /// + /// The underlying (boxed) symbol. + /// + public ISymbol Symbol { get; } + /// + /// The type of the generic value stored inside the symbol. + /// + public Type SymbolType => typeof(T); + + + /// + /// Creates a new generic symbol wrapper for the given (boxed) symbol. + /// + /// Boxed symbol. + public SymbolWrapper(ISymbol symbol) => Symbol = symbol; + + /// + public override string? ToString() => Symbol.ToString(); + } + + /// + /// Represents a generic terminal symbol wrapper. + /// + /// + /// The generic type stored inside the symbol. + public sealed class TerminalWrapper + : SymbolWrapper + , ITerminalWrapper + { + /// + ITerminal ITerminalWrapper.Symbol => (ITerminal)Symbol; + + /// + /// Creates a new generic symbol wrapper for the given (boxed) terminal symbol. + /// + /// Boxed terminal symbol. + public TerminalWrapper(ISymbol symbol) + : base(symbol) + { + } + } + + /// + /// Represents a generic non-terminal symbol wrapper. + /// + /// + /// The generic type stored inside the symbol. + public sealed class NonTerminalWrapper + : SymbolWrapper + { + /// + /// Creates a new generic symbol wrapper for the given (boxed) non-terminal symbol. + /// + /// Boxed non-terminal symbol. + public NonTerminalWrapper(ISymbol symbol) + : base(symbol) + { + } + + /// + /// Creates a new (empty) production rule on the current non-terminal symbol and returns it. + /// + /// The newly created production rule. + public ProductionWrapper AddProduction() => new ProductionWrapper(((INonTerminal)Symbol).AddProduction()); +"); + + for (int i = 1; i <= MAX_SIZE; ++i) + { + string types = string.Join(", ", range(0, i).Select(x => $"T{x}")); + string names = string.Join(", ", range(0, i).Select(x => $"sym{x}.Symbol")); + string args = string.Join(", ", range(0, i).Select(x => $"SymbolWrapper sym{x}")); + + writer.Write($@" + /// + /// Creates a new production rule on the current non-terminal symbol. + ///
+ /// This production represents the reducing of the current non-terminal to the {i} given symbols. + ///
"); + + for (int j = 0; j < i; ++j) + writer.Write($@" + /// The generic type of ."); + + for (int j = 0; j < i; ++j) + writer.Write($@" + /// The symbol no. {j}, to which the current non-terminal symbol gets reduced."); + + writer.Write($@" + /// The newly created production rule. + public ProductionWrapper<{types}, T> AddProduction<{types}>({args}) => + new ProductionWrapper<{types}, T>(((INonTerminal)Symbol).AddProduction({names})); +"); + } + + writer.Write(@" + } + + /// + /// Represents an abstract generic production wrapper. + /// + /// The generic return type of the production. This is the type stored inside the non-terminal which gets reduced by the production represented by this wrapper. + public abstract class ProductionWrapperBase + where T : ProductionWrapperBase + { + /// + /// The underlying (boxed) production of this wrapper. + /// + public IProduction Production { get; } + + + /// + /// Creates a new abstract generic production wrapper based on the given (boxed) production. + /// + /// Boxed production instance. + public ProductionWrapperBase(IProduction production) => Production = production; + + /// + /// Configures the production to reduce the non-terminal to the first symbol. This is equivalent to with the index 0. + /// + /// The current instance. + public T SetReduceToFirst() + { + Production.SetReduceToFirst(); + + return (T)this; + } + + /// + /// Sets given precedence group to the current production. + /// + /// Precedence group to the assigned to the current production. + /// The current instance. + public T SetPrecedence(IPrecedenceGroup precedence) + { + Production.SetPrecedence(precedence); + + return (T)this; + } + } +"); + for (int i = 1; i <= MAX_SIZE + 1; ++i) + { + IEnumerable types = range(0, i).Select(x => x == i - 1 ? "R" : $"T{x}"); + string typestr = string.Join(", ", types); + + writer.Write($@" + /// + /// Represents a generic reduce function of the type ""({typestr}) -> "". + /// + /// "); + + for (int j = 0; j < i - 1; ++j) + writer.Write($@" + /// The generic input type of the symbol no. {j}."); + + writer.Write($@" + /// The generic return type of the production. + public sealed class ProductionWrapper<{typestr}> + : ProductionWrapperBase> + {{ + /// + /// Creates a new generic production wrapper based on the given (boxed) production. + /// + /// + /// Boxed production instance. + public ProductionWrapper(IProduction production) + : base(production) + {{ + }} + + public ProductionWrapper<{typestr}> SetReduceFunction(Func<{typestr}> f) + {{ + Production.SetReduceFunction(args => f({string.Join(", ", types.Take(i - 1).Select((x, i) => $"({x})args[{i}]"))})); + + return this; + }} + }} +"); + } + + writer.WriteLine("}"); + } + + private static void CreateFSharpExtensions(StreamWriter writer) + { + writer.Write($@" +/////////////////////////////////////////////////////////////////////// +// AUTOGENERATED {DateTime.Now:yyyy-MM-dd HH:mm:ss.ffffff} // +// All your changes to this file will be lost upon re-generation. // +/////////////////////////////////////////////////////////////////////// + +namespace Piglet.Parser.Configuration.FSharp + +open Piglet.Parser.Configuration.Generic +open System + + +type ParserConstructor<'TOut> with + // TODO : ? + end + +/// A module containing F# code extensions for the namespace 'Piglet.Parser.Configuration.Generic'. +[] +module FSharpExtensions = + + /// + /// Creates a new production rule to reduce the given symbol '' into a constant provided by the function ''. + /// + /// This function returns the newly created production rule. To discard the return value, either use ""|> ignore"" or use the function . + /// + /// The start symbol for this production. + /// The reduce function. + /// The newly created production rule. + let reduce_c (start : NonTerminalWrapper<'a>) func = start.AddProduction().SetReduceFunction(Func<'a>(func)) + + /// + /// Creates a new production rule to reduce the given symbol '' into a constant provided by the function ''. + /// + /// This function does not returns the newly created production rule. Use the function if you intend to reference the production rule. + /// + /// The start symbol for this production. + /// The reduce function. + let reduce_ci (start : NonTerminalWrapper<'a>) func = ignore <| reduce_c start func + + /// + /// Creates a new production rule to reduce the given symbol '' to the given symbol '' using an identity mapping. + /// + /// This function returns the newly created production rule. To discard the return value, either use ""|> ignore"" or use the function . + /// + /// The start symbol for this production. + /// The reduced symbol. + /// The newly created production rule. + let reduce_0 (start : NonTerminalWrapper<'a>) symbol = start.AddProduction(symbol).SetReduceToFirst() + + /// + /// Creates a new production rule to reduce the given symbol '' to the given symbol '' using an identity mapping. + /// + /// This function does not returns the newly created production rule. Use the function if you intend to reference the production rule. + /// + /// The start symbol for this production. + /// The reduced symbol. + /// The newly created production rule. + let reduce_0i (start : NonTerminalWrapper<'a>) symbol = ignore <| reduce_0 start symbol +"); + + for (int i = 1; i <= MAX_SIZE; ++i) + { + string[] variables = range(1, i).Select(x => "symbol" + x).ToArray(); + + writer.Write($@" + /// + /// Creates a new production rule to reduce the given symbol '' to the given {i} symbol(s) using the given production function. + /// + /// This function returns the newly created production rule. To discard the return value, either use ""|> ignore"" or use the function . + /// + /// The start symbol for this production.The reduce function. + /// The newly created production rule. + let reduce_{i} (start : NonTerminalWrapper<'a>) {string.Join(" ", variables)} func = + start.AddProduction({string.Join(", ", variables)}).SetReduceFunction(Func<{string.Join(", ", Enumerable.Repeat('_', i))}, 'a>(func)) + + + /// + /// Creates a new production rule to reduce the given symbol '' to the given {i} symbol(s) using the given production function. + /// + /// This function does not returns the newly created production rule. Use the function if you intend to reference the production rule. + /// + /// The start symbol for this production. + /// The reduce function. + /// The newly created production rule. + let reduce_{i}i (start : NonTerminalWrapper<'a>) {string.Join(" ", variables)} func = + reduce_{i} start {string.Join(" ", variables)} func |> ignore +"); + } + } + + private static IEnumerable range(int start, int count) => Enumerable.Range(start, count); + } +} diff --git a/Piglet.Tests/Debug/CoreCompileInputs.cache b/Piglet.Tests/Debug/CoreCompileInputs.cache new file mode 100644 index 0000000..115fac2 --- /dev/null +++ b/Piglet.Tests/Debug/CoreCompileInputs.cache @@ -0,0 +1 @@ +3d2bf29dc6c3ae7d4d4afd18b88ee923ca78b623 diff --git a/Piglet.Tests/Debug/DesignTimeResolveAssemblyReferencesInput.cache b/Piglet.Tests/Debug/DesignTimeResolveAssemblyReferencesInput.cache new file mode 100644 index 0000000..99e6c7a Binary files /dev/null and b/Piglet.Tests/Debug/DesignTimeResolveAssemblyReferencesInput.cache differ diff --git a/Piglet.Tests/Lexer/Construction/DotNotation/TestDotNotation.cs b/Piglet.Tests/Lexer/Construction/DotNotation/TestDotNotation.cs index 5b9577c..f2555fc 100644 --- a/Piglet.Tests/Lexer/Construction/DotNotation/TestDotNotation.cs +++ b/Piglet.Tests/Lexer/Construction/DotNotation/TestDotNotation.cs @@ -12,7 +12,7 @@ public class TestDotNotation public void TestDotForNFA() { // Make sure it does not crash and does not return null. - var nfa = NfaBuilder.Create(new ShuntingYard(new RegExLexer(new StringReader("(a|b)+bcd")))); + var nfa = NfaBuilder.Create(new ShuntingYard(new RegexLexer(new StringReader("(a|b)+bcd")), false)); string dotString = nfa.AsDotNotation(null); Assert.IsNotNull(dotString); } @@ -21,7 +21,7 @@ public void TestDotForNFA() public void TestDotForDFA() { // Make sure it does not crash and does not return null. - var dfa = DFA.Create(NfaBuilder.Create(new ShuntingYard(new RegExLexer(new StringReader("(a|b)+bcd"))))); + var dfa = DFA.Create(NfaBuilder.Create(new ShuntingYard(new RegexLexer(new StringReader("(a|b)+bcd")), false))); string dotString = dfa.AsDotNotation(null); Assert.IsNotNull(dotString); } @@ -30,7 +30,7 @@ public void TestDotForDFA() public void Should_be_able_to_mark_the_last_step_as_active_for_DFA() { // Make sure it does not crash and does not return null. - var dfa = DFA.Create(NfaBuilder.Create(new ShuntingYard(new RegExLexer(new StringReader("(a|b)+bcd"))))); + var dfa = DFA.Create(NfaBuilder.Create(new ShuntingYard(new RegexLexer(new StringReader("(a|b)+bcd")), false))); string dotString = dfa.AsDotNotation("abc"); Assert.IsNotNull(dotString); Assert.IsTrue(dotString.Contains("4 [ fillcolor=\"green\" style=\"filled\"]")); @@ -40,7 +40,7 @@ public void Should_be_able_to_mark_the_last_step_as_active_for_DFA() public void Should_be_able_to_mark_active_state_for_NFA() { // Make sure it does not crash and does not return null. - var nfa = NfaBuilder.Create(new ShuntingYard(new RegExLexer(new StringReader("(a|b)+bcd")))); + var nfa = NfaBuilder.Create(new ShuntingYard(new RegexLexer(new StringReader("(a|b)+bcd")), false)); string dotString = nfa.AsDotNotation("bbc"); Assert.IsNotNull(dotString); Assert.IsTrue(dotString.Contains("8 [ fillcolor=\"green\" style=\"filled\"]")); @@ -50,7 +50,7 @@ public void Should_be_able_to_mark_active_state_for_NFA() public void Should_return_matched_string_when_successful() { // Make sure it does not crash and does not return null. - var nfa = NfaBuilder.Create(new ShuntingYard(new RegExLexer(new StringReader("(a|b)+bcd")))); + var nfa = NfaBuilder.Create(new ShuntingYard(new RegexLexer(new StringReader("(a|b)+bcd")), false)); var result = nfa.Stimulate("bbc"); Assert.AreEqual("bbc", result.Matched); @@ -60,7 +60,7 @@ public void Should_return_matched_string_when_successful() public void Should_only_return_successfully_matched_string() { // Make sure it will only return part of the string. - var nfa = NfaBuilder.Create(new ShuntingYard(new RegExLexer(new StringReader("(a|b)+bcd")))); + var nfa = NfaBuilder.Create(new ShuntingYard(new RegexLexer(new StringReader("(a|b)+bcd")), false)); var result = nfa.Stimulate("bbcxxxx"); Assert.AreEqual("bbc", result.Matched); diff --git a/Piglet.Tests/Lexer/Construction/TestNFA.cs b/Piglet.Tests/Lexer/Construction/TestNFA.cs index c052976..e63632a 100644 --- a/Piglet.Tests/Lexer/Construction/TestNFA.cs +++ b/Piglet.Tests/Lexer/Construction/TestNFA.cs @@ -17,10 +17,7 @@ public void TestConstructWithDigit() Assert.AreEqual(3, nfa.Transitions.Count()); } - private NFA NFACreate(string s) - { - return NfaBuilder.Create(new ShuntingYard(new RegExLexer(new StringReader(s)))); - } + private NFA NFACreate(string s) => NfaBuilder.Create(new ShuntingYard(new RegexLexer(new StringReader(s)), false)); [Test] public void TestRepeat() diff --git a/Piglet.Tests/Lexer/Construction/TestRegEx.cs b/Piglet.Tests/Lexer/Construction/TestRegEx.cs index 82df173..b169c92 100644 --- a/Piglet.Tests/Lexer/Construction/TestRegEx.cs +++ b/Piglet.Tests/Lexer/Construction/TestRegEx.cs @@ -11,11 +11,8 @@ namespace Piglet.Tests.Lexer.Construction [TestFixture] public class TestRegEx { - private IEnumerable>> GetAllConfigurationOptions() - { - return Enum.GetValues(typeof (LexerRuntime)).Cast().Select( + private IEnumerable>> GetAllConfigurationOptions() => Enum.GetValues(typeof(LexerRuntime)).Cast().Select( f => new Action>(c => c.Runtime = f)); - } private IEnumerable> CreateLexers(string regEx) { @@ -29,20 +26,11 @@ private IEnumerable> CreateLexers(string regEx) })); } - private void CheckMatch(string input, string regEx) - { - IsMatch(input, regEx, true); - } + private void CheckMatch(string input, string regEx) => IsMatch(input, regEx, true); - private void CheckMatch(string input, string regEx, string expectedMatch) - { - IsMatch(input, regEx, true, expectedMatch); - } + private void CheckMatch(string input, string regEx, string expectedMatch) => IsMatch(input, regEx, true, expectedMatch); - private void IsMatch(string input, string regEx, bool shouldMatch) - { - IsMatch(input, regEx, shouldMatch, input); - } + private void IsMatch(string input, string regEx, bool shouldMatch) => IsMatch(input, regEx, shouldMatch, input); private void IsMatch(string input, string regEx, bool shouldMatch, string matchedInput) { @@ -51,9 +39,9 @@ private void IsMatch(string input, string regEx, bool shouldMatch, string matche var lexerInstance = lexer.Begin(new StringReader(input)); try { - Tuple token = lexerInstance.Next(); + var token = lexerInstance.Next(); Assert.AreEqual(0, token.Item1); - Assert.AreEqual(matchedInput, token.Item2); + Assert.AreEqual(matchedInput, token.Item2.SymbolValue); Assert.IsTrue(shouldMatch); } catch (LexerException) @@ -63,58 +51,31 @@ private void IsMatch(string input, string regEx, bool shouldMatch, string matche } } - private void CheckMatchFail(string input, string regEx) - { - IsMatch(input, regEx, false); - } + private void CheckMatchFail(string input, string regEx) => IsMatch(input, regEx, false); [Test] - public void TestMatchingQuotesWithEscapes() - { - CheckMatch("\" A quoted string with \\\" inside\"", @"""(\\.|[^""])*"""); - } + public void TestMatchingQuotesWithEscapes() => CheckMatch("\" A quoted string with \\\" inside\"", @"""(\\.|[^""])*"""); [Test] - public void TestStuff() - { - CheckMatch("absc", "a(bs|e)*c"); - } + public void TestStuff() => CheckMatch("absc", "a(bs|e)*c"); [Test] - public void TestDeepNestedParenthesis() - { - CheckMatch("abcde", "(a(b)(c(de)))"); - } + public void TestDeepNestedParenthesis() => CheckMatch("abcde", "(a(b)(c(de)))"); [Test] - public void TestEscapedCharacters() - { - CheckMatch("++++", "\\++"); - } + public void TestEscapedCharacters() => CheckMatch("++++", "\\++"); [Test] - public void TestDigit() - { - CheckMatch("123", "\\d+"); - } + public void TestDigit() => CheckMatch("123", "\\d+"); [Test] - public void TestRange() - { - CheckMatch("abcde", "[a-e]+"); - } + public void TestRange() => CheckMatch("abcde", "[a-e]+"); [Test] - public void TestMultipleRanges() - { - CheckMatch("abcdePOPP", "[a-eA-Z]+"); - } + public void TestMultipleRanges() => CheckMatch("abcdePOPP", "[a-eA-Z]+"); [Test] - public void TestAnyCharacter() - { - CheckMatch("XHXas!!a.A", "X.X.*\\.A"); - } + public void TestAnyCharacter() => CheckMatch("XHXas!!a.A", "X.X.*\\.A"); [Test] public void TestZeroOrOnce() @@ -124,22 +85,13 @@ public void TestZeroOrOnce() } [Test] - public void TestEscapedParenthesis() - { - CheckMatch("(b)", "\\((a|b)\\)"); - } + public void TestEscapedParenthesis() => CheckMatch("(b)", "\\((a|b)\\)"); [Test] - public void TestParenthesis() - { - CheckMatch("a", "(a)"); - } + public void TestParenthesis() => CheckMatch("a", "(a)"); [Test] - public void TestParenthesisWithRepeat() - { - CheckMatch("a", "(a)+"); - } + public void TestParenthesisWithRepeat() => CheckMatch("a", "(a)+"); [Test] public void TestParenthesisWithAlternate() @@ -149,16 +101,10 @@ public void TestParenthesisWithAlternate() } [Test] - public void TestNegateCharacterClass() - { - CheckMatch("abcd", "[^ABCD]+"); - } + public void TestNegateCharacterClass() => CheckMatch("abcd", "[^ABCD]+"); [Test] - public void TestNegateInWrongPosition() - { - CheckMatch("^", "[x^]"); - } + public void TestNegateInWrongPosition() => CheckMatch("^", "[x^]"); [Test] public void SpecialCharactersAreNotThatSpecialInsideAClass() @@ -170,36 +116,24 @@ public void SpecialCharactersAreNotThatSpecialInsideAClass() [Test] public void TestCommentRegex() { - var lexer = LexerFactory.Configure( - f => - { - f.Token(@";[^\n]*\n", a => a); - f.Token("nextLine", a => a + "%" ); - }); - - var lexerInstance = lexer.Begin(@"; this is a comment -nextLine"); - Assert.AreEqual("; this is a comment\r\n", lexerInstance.Next().Item2); - Assert.AreEqual("nextLine%", lexerInstance.Next().Item2); + var lexer = LexerFactory.Configure(f => { + f.Token(@";[^\n]*\n", a => a); + f.Token("nextLine", a => a + "%" ); + }); + var lexerInstance = lexer.Begin("; this is a comment\r\nnextLine"); + + Assert.AreEqual("; this is a comment\r\n", lexerInstance.Next().token.SymbolValue); + Assert.AreEqual("nextLine%", lexerInstance.Next().token.SymbolValue); } [Test] - public void TestNonDigitEscaped() - { - CheckMatch("abcde", "\\D+"); - } + public void TestNonDigitEscaped() => CheckMatch("abcde", "\\D+"); [Test] - public void TestMatchWhitespace() - { - CheckMatch(" \t\n\r", "\\s+"); - } + public void TestMatchWhitespace() => CheckMatch(" \t\n\r", "\\s+"); [Test] - public void TestMatchNonWhitespace() - { - CheckMatch("jfsdhsd", "\\S+"); - } + public void TestMatchNonWhitespace() => CheckMatch("jfsdhsd", "\\S+"); [Test] [TestCase("abcdefghijklmnopqrstuvwxyz")] @@ -208,17 +142,11 @@ public void TestMatchNonWhitespace() [TestCase("_")] [TestCase("01234567890")] [TestCase("\x16C8\x16C1\x16B7\x16DA\x16D6\x16CF")]//Piglet in Runic - public void TestMatchWordCharactersInclude(string input) - { - CheckMatch(input, "\\w+"); - } + public void TestMatchWordCharactersInclude(string input) => CheckMatch(input, "\\w+"); [Test] [TestCase("-!\\\"#€%&/()='|<>,.*^¨`´?+;:@$")] - public void TestMatchWordCharactersExclude(string input) - { - CheckMatchFail(input, "\\w+"); - } + public void TestMatchWordCharactersExclude(string input) => CheckMatchFail(input, "\\w+"); [Test] public void TestGreedyOr() @@ -228,28 +156,16 @@ public void TestGreedyOr() } [Test] - public void TestMatchNonAlphanumeric() - { - CheckMatch(" \n!@#", "\\W+"); - } + public void TestMatchNonAlphanumeric() => CheckMatch(" \n!@#", "\\W+"); [Test] - public void TestMatchLiteral() - { - CheckMatch("ABC", "ABC"); - } + public void TestMatchLiteral() => CheckMatch("ABC", "ABC"); [Test] - public void TestEscapedSlash() - { - CheckMatch("\\\\", "\\\\+"); - } + public void TestEscapedSlash() => CheckMatch("\\\\", "\\\\+"); [Test] - public void TestBracketInCharacterClass() - { - CheckMatch("[][][]", @"[\]\[ab]+"); - } + public void TestBracketInCharacterClass() => CheckMatch("[][][]", @"[\]\[ab]+"); [Test] public void TestExactNumberedRepetition() @@ -276,16 +192,10 @@ public void TestAtLeastComplexRepetition() } [Test] - public void TestMultipleParenthesis() - { - CheckMatch("abcd", "(ab)(cd)"); - } + public void TestMultipleParenthesis() => CheckMatch("abcd", "(ab)(cd)"); [Test] - public void TestNumberedRepetitionWithParenthesis() - { - CheckMatch("coolcoolcool", "(cool){3}"); - } + public void TestNumberedRepetitionWithParenthesis() => CheckMatch("coolcoolcool", "(cool){3}"); [Test] public void TestNumberedRepetitionWithMaxValue() diff --git a/Piglet.Tests/Lexer/Construction/TestStateMinimization.cs b/Piglet.Tests/Lexer/Construction/TestStateMinimization.cs index dc0fd55..05070de 100644 --- a/Piglet.Tests/Lexer/Construction/TestStateMinimization.cs +++ b/Piglet.Tests/Lexer/Construction/TestStateMinimization.cs @@ -11,10 +11,7 @@ namespace Piglet.Tests.Lexer.Construction [TestFixture] public class TestStateMinimization { - private static DFA CreateDfa(string expression) - { - return DFA.Create(NfaBuilder.Create(new ShuntingYard(new RegExLexer(new StringReader(expression))))); - } + private static DFA CreateDfa(string expression) => DFA.Create(NfaBuilder.Create(new ShuntingYard(new RegexLexer(new StringReader(expression)), false))); [Test] public void TestSimpleMinimization() diff --git a/Piglet.Tests/Lexer/Construction/TestUnicodeLexing.cs b/Piglet.Tests/Lexer/Construction/TestUnicodeLexing.cs index e13d4e3..5605747 100644 --- a/Piglet.Tests/Lexer/Construction/TestUnicodeLexing.cs +++ b/Piglet.Tests/Lexer/Construction/TestUnicodeLexing.cs @@ -11,16 +11,16 @@ public class TestUnicodeLexing public void TestUnicode() { var lexer = LexerFactory.Configure(c => - { - c.Token("خنزير صغير", f => "arabic"); - c.Token("nasse", f => "swedish"); - c.Ignore(" "); - c.Runtime = LexerRuntime.Nfa; - }); + { + c.Token("خنزير صغير", f => "arabic"); + c.Token("nasse", f => "swedish"); + c.Ignore(" "); + c.Runtime = LexerRuntime.Nfa; + }); var lexerInstance = lexer.Begin("خنزير صغير" + " nasse"); - Assert.AreEqual("arabic", lexerInstance.Next().Item2); - Assert.AreEqual("swedish", lexerInstance.Next().Item2); + Assert.AreEqual("arabic", lexerInstance.Next().token.SymbolValue); + Assert.AreEqual("swedish", lexerInstance.Next().token.SymbolValue); } @@ -36,8 +36,8 @@ public void TestUnicodeDfa() }); var lexerInstance = lexer.Begin("خنزير صغير" + " nasse"); - Assert.AreEqual("arabic", lexerInstance.Next().Item2); - Assert.AreEqual("swedish", lexerInstance.Next().Item2); + Assert.AreEqual("arabic", lexerInstance.Next().token.SymbolValue); + Assert.AreEqual("swedish", lexerInstance.Next().token.SymbolValue); } } diff --git a/Piglet.Tests/Lexer/TestLexer.cs b/Piglet.Tests/Lexer/TestLexer.cs index d402a2c..f8a12c4 100644 --- a/Piglet.Tests/Lexer/TestLexer.cs +++ b/Piglet.Tests/Lexer/TestLexer.cs @@ -20,9 +20,10 @@ public void TestLexerConstruction() c.Token("a*b+", f => "A*B+"); }); var li = lexer.Begin(new StringReader("abb")); - Tuple tuple = li.Next(); + var tuple = li.Next(); + Assert.AreEqual(1, tuple.Item1); - Assert.AreEqual("ABB", tuple.Item2); + Assert.AreEqual("ABB", tuple.token.SymbolValue); } [Test] @@ -36,9 +37,10 @@ public void TestLexerConstructionUsingDfaEngine() c.Runtime = LexerRuntime.Dfa; }); var li = lexer.Begin(new StringReader("abb")); - Tuple tuple = li.Next(); + var tuple = li.Next(); + Assert.AreEqual(1, tuple.Item1); - Assert.AreEqual("ABB", tuple.Item2); + Assert.AreEqual("ABB", tuple.token.SymbolValue); } [Test] @@ -52,9 +54,10 @@ public void TestLexerConstructionUsingNfaEngine() c.Runtime = LexerRuntime.Nfa; }); var li = lexer.Begin(new StringReader("abb")); - Tuple tuple = li.Next(); + var tuple = li.Next(); + Assert.AreEqual(1, tuple.Item1); - Assert.AreEqual("ABB", tuple.Item2); + Assert.AreEqual("ABB", tuple.token.SymbolValue); } [Test] @@ -68,11 +71,12 @@ public void TestMinimizationWontMessUpLexing() c.Ignore(" "); }); var li = lexer.Begin("aa aaaaaaa aa aaaa aa"); - Assert.AreEqual("aa", li.Next().Item2); - Assert.AreEqual("a+", li.Next().Item2); - Assert.AreEqual("aa", li.Next().Item2); - Assert.AreEqual("a+", li.Next().Item2); - Assert.AreEqual("aa", li.Next().Item2); + + Assert.AreEqual("aa", li.Next().token.SymbolValue); + Assert.AreEqual("a+", li.Next().token.SymbolValue); + Assert.AreEqual("aa", li.Next().token.SymbolValue); + Assert.AreEqual("a+", li.Next().token.SymbolValue); + Assert.AreEqual("aa", li.Next().token.SymbolValue); } [Test] @@ -86,11 +90,10 @@ public void TestLexerConstructionWithWhitespaceIgnore() c.Ignore(" *"); }); var li = lexer.Begin(new StringReader(" abb bbbbbbbbb")); - - Tuple tuple = li.Next(); - Assert.AreEqual("ABB", tuple.Item2); + var tuple = li.Next(); + Assert.AreEqual("ABB", tuple.token.SymbolValue); tuple = li.Next(); - Assert.AreEqual("A*B+", tuple.Item2); + Assert.AreEqual("A*B+", tuple.token.SymbolValue); } [Test] @@ -103,15 +106,16 @@ public void TestGetsEndOfInputTokenIfIgnoredStuffAtEnd() c.EndOfInputTokenNumber = -1; }); var li = lexer.Begin("bbbbbbaaabbbaaaaabbbb"); - Tuple lexVal = li.Next(); + var lexVal = li.Next(); + Assert.AreEqual(0, lexVal.Item1); - Assert.AreEqual("aaa", lexVal.Item2); + Assert.AreEqual("aaa", lexVal.token.SymbolValue); lexVal = li.Next(); Assert.AreEqual(0, lexVal.Item1); - Assert.AreEqual("aaaaa", lexVal.Item2); + Assert.AreEqual("aaaaa", lexVal.token.SymbolValue); lexVal = li.Next(); Assert.AreEqual(-1, lexVal.Item1); - Assert.AreEqual(null, lexVal.Item2); + Assert.AreEqual(null, lexVal.token.SymbolValue); } [Test] @@ -123,11 +127,11 @@ public void TestLexDigits() c.Ignore(" *"); }); var li = lexer.Begin(new StringReader(" 123 42")); + var tuple = li.Next(); - Tuple tuple = li.Next(); - Assert.AreEqual(123, tuple.Item2); + Assert.AreEqual(123, tuple.token.SymbolValue); tuple = li.Next(); - Assert.AreEqual(42, tuple.Item2); + Assert.AreEqual(42, tuple.token.SymbolValue); } [Test] @@ -154,82 +158,82 @@ public void TestLexErrorOnThirdLine() } } - [Test] - public void TestPerformanceWhenHandlingVeryLargeCharRanges() - { - // foreach (var runtime in Enum.GetValues(typeof(LexerRuntime))) - { - // Console.WriteLine(runtime.ToString()); - // var ticks = System.DateTime.Now.Ticks; - - var lexer = LexerFactory.Configure(configurator => - { - configurator.Runtime = LexerRuntime.Tabular; - configurator.Token("\\w[0-9]", null); - configurator.Token("\\d\\D\\W", null); - configurator.Token("abcdefghijklmnopqrstuvxyz", null); - configurator.Token("01234567890&%#", null); - }); - - // Console.WriteLine(System.DateTime.Now.Ticks - ticks); - } - } - - [Test] - public void TestLexLargeText() - { - const string text = "Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy " + - "nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat. Ut wisi enim ad " + - "minim veniam, quis nostrud exerci tation ullamcorper suscipit lobortis nisl ut aliquip " + - "ex ea commodo consequat. Duis autem vel eum iriure dolor in hendrerit in vulputate velit " + - "esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et " + - "accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue " + - "duis dolore te feugait nulla facilisi. Nam liber tempor cum soluta nobis eleifend " + - "option congue nihil imperdiet doming id quod mazim placerat facer possim assum. " + - "Typi non habent claritatem insitam; est usus legentis in iis qui facit eorum " + - "claritatem. Investigationes demonstraverunt lectores legere me lius quod ii " + - "legunt saepius. Claritas est etiam processus dynamicus, qui sequitur mutationem " + - "consuetudium lectorum. Mirum est notare quam littera gothica, quam nunc putamus parum claram, " + - "anteposuerit litterarum formas humanitatis per seacula quarta decima et quinta decima. " + - "Eodem modo typi, qui nunc nobis videntur parum clari, fiant sollemnes in futurum."; - - int numWords = 0; - int numPunctuation = 0; - var lexer = LexerFactory.Configure(c => - { - c.Token("\\w+", s => ++numWords); - c.Token("[.,]", s => ++numPunctuation); - c.Ignore("\\s+"); - }); - int numTokens = 0; - foreach (var token in lexer.Tokenize(text)) - { - numTokens++; - } - Assert.AreEqual(172, numWords); - Assert.AreEqual(18, numPunctuation); - Assert.AreEqual(190, numTokens); - - Console.WriteLine("asas"); - } - - [Test] + [Test] + public void TestPerformanceWhenHandlingVeryLargeCharRanges() + { + // foreach (var runtime in Enum.GetValues(typeof(LexerRuntime))) + { + // Console.WriteLine(runtime.ToString()); + // var ticks = System.DateTime.Now.Ticks; + + var lexer = LexerFactory.Configure(configurator => + { + configurator.Runtime = LexerRuntime.Tabular; + configurator.Token("\\w[0-9]", null); + configurator.Token("\\d\\D\\W", null); + configurator.Token("abcdefghijklmnopqrstuvxyz", null); + configurator.Token("01234567890&%#", null); + }); + + // Console.WriteLine(System.DateTime.Now.Ticks - ticks); + } + } + + [Test] + public void TestLexLargeText() + { + const string text = "Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy " + + "nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat. Ut wisi enim ad " + + "minim veniam, quis nostrud exerci tation ullamcorper suscipit lobortis nisl ut aliquip " + + "ex ea commodo consequat. Duis autem vel eum iriure dolor in hendrerit in vulputate velit " + + "esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et " + + "accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue " + + "duis dolore te feugait nulla facilisi. Nam liber tempor cum soluta nobis eleifend " + + "option congue nihil imperdiet doming id quod mazim placerat facer possim assum. " + + "Typi non habent claritatem insitam; est usus legentis in iis qui facit eorum " + + "claritatem. Investigationes demonstraverunt lectores legere me lius quod ii " + + "legunt saepius. Claritas est etiam processus dynamicus, qui sequitur mutationem " + + "consuetudium lectorum. Mirum est notare quam littera gothica, quam nunc putamus parum claram, " + + "anteposuerit litterarum formas humanitatis per seacula quarta decima et quinta decima. " + + "Eodem modo typi, qui nunc nobis videntur parum clari, fiant sollemnes in futurum."; + + int numWords = 0; + int numPunctuation = 0; + var lexer = LexerFactory.Configure(c => + { + c.Token("\\w+", s => ++numWords); + c.Token("[.,]", s => ++numPunctuation); + c.Ignore("\\s+"); + }); + int numTokens = 0; + foreach (var token in lexer.Tokenize(text)) + { + numTokens++; + } + Assert.AreEqual(172, numWords); + Assert.AreEqual(18, numPunctuation); + Assert.AreEqual(190, numTokens); + + Console.WriteLine("asas"); + } + + [Test] public void TestCreateDFA() { - NFA nfa = NfaBuilder.Create(new ShuntingYard(new RegExLexer(new StringReader("a|b*cd")))); + NFA nfa = NfaBuilder.Create(new ShuntingYard(new RegexLexer(new StringReader("a|b*cd")), false)); DFA dfa = DFA.Create(nfa); } [Test] public void TestCreateDFA2() { - DFA dfa = DFA.Create(NfaBuilder.Create(new ShuntingYard(new RegExLexer(new StringReader("a|b|c"))))); + DFA dfa = DFA.Create(NfaBuilder.Create(new ShuntingYard(new RegexLexer(new StringReader("a|b|c")), false))); } [Test] public void TestOneOrMoreDFA() { - NFA nfa = NfaBuilder.Create(new ShuntingYard(new RegExLexer(new StringReader("a+")))); + NFA nfa = NfaBuilder.Create(new ShuntingYard(new RegexLexer(new StringReader("a+")), false)); DFA dfa = DFA.Create(nfa); } } diff --git a/Piglet.Tests/Parser/ErrorRecoveryTests.cs b/Piglet.Tests/Parser/ErrorRecoveryTests.cs index 59f5a28..55e5d13 100644 --- a/Piglet.Tests/Parser/ErrorRecoveryTests.cs +++ b/Piglet.Tests/Parser/ErrorRecoveryTests.cs @@ -23,7 +23,7 @@ public void TestRecoverFromErrors() var terminatedA = configurator.CreateNonTerminal(); a.AddProduction(a, "a").SetReduceFunction(f => f[0] + 1); - a.AddProduction("a").SetReduceFunction(f => 1); + a.AddProduction("a").SetReduceFunction((int[] _) => 1); terminatedA.AddProduction(a, ";").SetReduceFunction( f => f[0]); terminatedA.AddProduction(configurator.ErrorToken, ";").SetErrorFunction((e, f) => diff --git a/Piglet.Tests/Parser/ParserConfigurationTest.cs b/Piglet.Tests/Parser/ParserConfigurationTest.cs index 4a07b83..f6ab0f3 100644 --- a/Piglet.Tests/Parser/ParserConfigurationTest.cs +++ b/Piglet.Tests/Parser/ParserConfigurationTest.cs @@ -37,7 +37,7 @@ public void TestACalculator() factor.AddProduction("(", expr, ")").SetReduceFunction(s => s[1]); var parser = configurator.CreateParser(); - int result = parser.Parse(new StringReader("2-2-5")); + int result = parser.Parse("2-2-5"); Assert.AreEqual(-5, result); } @@ -348,43 +348,43 @@ public void TestSingleRuleTerminalGrammar() parser.Parse("abcd"); } - [Test] - public void TestConstantStringsInRulesTakesPrecedenceOverDeclaredTerminals() - { - var configurator = ParserFactory.Configure(); - var ident = configurator.CreateTerminal("[a-z]+"); - var s = configurator.CreateNonTerminal(); - int x = 0; - s.AddProduction("abc", ident).SetReduceFunction(f => - { - x = 1; - return 0; - }); - var parser = configurator.CreateParser(); - - parser.Parse("abc abcde"); - - Assert.AreEqual(1, x); - } - - [Test] - public void TestRedeclaredTerminalDoesNotChangePrecedence() - { - var configurator = ParserFactory.Configure(); - var name = configurator.CreateTerminal("abc"); - var ident = configurator.CreateTerminal("[a-z]+"); - var s = configurator.CreateNonTerminal(); - int x = 0; - s.AddProduction("abc", ident).SetReduceFunction(f => - { - x = 1; - return 0; - }); - var parser = configurator.CreateParser(); - - parser.Parse("abc abcde"); - - Assert.AreEqual(1, x); - } + [Test] + public void TestConstantStringsInRulesTakesPrecedenceOverDeclaredTerminals() + { + var configurator = ParserFactory.Configure(); + var ident = configurator.CreateTerminal("[a-z]+"); + var s = configurator.CreateNonTerminal(); + int x = 0; + s.AddProduction("abc", ident).SetReduceFunction((int[] _) => + { + x = 1; + return 0; + }); + var parser = configurator.CreateParser(); + + parser.Parse("abc abcde"); + + Assert.AreEqual(1, x); + } + + [Test] + public void TestRedeclaredTerminalDoesNotChangePrecedence() + { + var configurator = ParserFactory.Configure(); + var name = configurator.CreateTerminal("abc"); + var ident = configurator.CreateTerminal("[a-z]+"); + var s = configurator.CreateNonTerminal(); + int x = 0; + s.AddProduction("abc", ident).SetReduceFunction((int[] _) => + { + x = 1; + return 0; + }); + var parser = configurator.CreateParser(); + + parser.Parse("abc abcde"); + + Assert.AreEqual(1, x); + } } } diff --git a/Piglet.Tests/Piglet.Tests.csproj b/Piglet.Tests/Piglet.Tests.csproj index 0e98eb7..ccc36a9 100644 --- a/Piglet.Tests/Piglet.Tests.csproj +++ b/Piglet.Tests/Piglet.Tests.csproj @@ -1,19 +1,14 @@ - - - netcoreapp2.1 - - false - - - - - - - - - - - - + + netcoreapp3.1 + false + + + + + + + + + \ No newline at end of file diff --git a/Piglet.sln b/Piglet.sln index 6b9148d..2e4a9c7 100644 --- a/Piglet.sln +++ b/Piglet.sln @@ -4,10 +4,28 @@ Microsoft Visual Studio Solution File, Format Version 12.00 VisualStudioVersion = 16.0.29613.14 MinimumVisualStudioVersion = 15.0.26124.0 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Piglet", "Piglet\Piglet.csproj", "{66291EB3-5AF3-4AEE-9976-E85C50EE25C3}" + ProjectSection(ProjectDependencies) = postProject + {0672D7AB-F4A2-4760-89CD-C433EB36E49C} = {0672D7AB-F4A2-4760-89CD-C433EB36E49C} + EndProjectSection EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Piglet.Tests", "Piglet.Tests\Piglet.Tests.csproj", "{4B926BCC-9F15-48FE-A37A-A9AF9F235360}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Piglet.Demo", "Demo\Piglet.Demo.csproj", "{B23B66AD-EA0F-4254-BDE6-C4EF883BD5B0}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Piglet.Demo", "Demo\Piglet.Demo.csproj", "{B23B66AD-EA0F-4254-BDE6-C4EF883BD5B0}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{41C436F4-E3FA-486E-8C4C-1B2828D00D50}" + ProjectSection(SolutionItems) = preProject + .gitignore = .gitignore + appveyor.yml = appveyor.yml + LICENSE.txt = LICENSE.txt + README.md = README.md + EndProjectSection +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Piglet.GenericGenerator", "Piglet.GenericGenerator\Piglet.GenericGenerator.csproj", "{0672D7AB-F4A2-4760-89CD-C433EB36E49C}" +EndProject +Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "Piglet.FSharp", "Piglet.FSharp\Piglet.FSharp.fsproj", "{449CB8F6-06C8-4559-A9FE-35552983E82E}" + ProjectSection(ProjectDependencies) = postProject + {0672D7AB-F4A2-4760-89CD-C433EB36E49C} = {0672D7AB-F4A2-4760-89CD-C433EB36E49C} + EndProjectSection EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution @@ -55,6 +73,30 @@ Global {B23B66AD-EA0F-4254-BDE6-C4EF883BD5B0}.Release|x64.Build.0 = Release|Any CPU {B23B66AD-EA0F-4254-BDE6-C4EF883BD5B0}.Release|x86.ActiveCfg = Release|Any CPU {B23B66AD-EA0F-4254-BDE6-C4EF883BD5B0}.Release|x86.Build.0 = Release|Any CPU + {0672D7AB-F4A2-4760-89CD-C433EB36E49C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {0672D7AB-F4A2-4760-89CD-C433EB36E49C}.Debug|Any CPU.Build.0 = Debug|Any CPU + {0672D7AB-F4A2-4760-89CD-C433EB36E49C}.Debug|x64.ActiveCfg = Debug|Any CPU + {0672D7AB-F4A2-4760-89CD-C433EB36E49C}.Debug|x64.Build.0 = Debug|Any CPU + {0672D7AB-F4A2-4760-89CD-C433EB36E49C}.Debug|x86.ActiveCfg = Debug|Any CPU + {0672D7AB-F4A2-4760-89CD-C433EB36E49C}.Debug|x86.Build.0 = Debug|Any CPU + {0672D7AB-F4A2-4760-89CD-C433EB36E49C}.Release|Any CPU.ActiveCfg = Release|Any CPU + {0672D7AB-F4A2-4760-89CD-C433EB36E49C}.Release|Any CPU.Build.0 = Release|Any CPU + {0672D7AB-F4A2-4760-89CD-C433EB36E49C}.Release|x64.ActiveCfg = Release|Any CPU + {0672D7AB-F4A2-4760-89CD-C433EB36E49C}.Release|x64.Build.0 = Release|Any CPU + {0672D7AB-F4A2-4760-89CD-C433EB36E49C}.Release|x86.ActiveCfg = Release|Any CPU + {0672D7AB-F4A2-4760-89CD-C433EB36E49C}.Release|x86.Build.0 = Release|Any CPU + {449CB8F6-06C8-4559-A9FE-35552983E82E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {449CB8F6-06C8-4559-A9FE-35552983E82E}.Debug|Any CPU.Build.0 = Debug|Any CPU + {449CB8F6-06C8-4559-A9FE-35552983E82E}.Debug|x64.ActiveCfg = Debug|Any CPU + {449CB8F6-06C8-4559-A9FE-35552983E82E}.Debug|x64.Build.0 = Debug|Any CPU + {449CB8F6-06C8-4559-A9FE-35552983E82E}.Debug|x86.ActiveCfg = Debug|Any CPU + {449CB8F6-06C8-4559-A9FE-35552983E82E}.Debug|x86.Build.0 = Debug|Any CPU + {449CB8F6-06C8-4559-A9FE-35552983E82E}.Release|Any CPU.ActiveCfg = Release|Any CPU + {449CB8F6-06C8-4559-A9FE-35552983E82E}.Release|Any CPU.Build.0 = Release|Any CPU + {449CB8F6-06C8-4559-A9FE-35552983E82E}.Release|x64.ActiveCfg = Release|Any CPU + {449CB8F6-06C8-4559-A9FE-35552983E82E}.Release|x64.Build.0 = Release|Any CPU + {449CB8F6-06C8-4559-A9FE-35552983E82E}.Release|x86.ActiveCfg = Release|Any CPU + {449CB8F6-06C8-4559-A9FE-35552983E82E}.Release|x86.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/Piglet/Common/CompressedTable.cs b/Piglet/Common/CompressedTable.cs index 397d9d4..3e47284 100644 --- a/Piglet/Common/CompressedTable.cs +++ b/Piglet/Common/CompressedTable.cs @@ -1,103 +1,99 @@ -using System; using System.Collections.Generic; using System.Linq; namespace Piglet.Common { - internal class CompressedTable : ITable2D + internal sealed class CompressedTable + : ITable2D { - private readonly int[] displacement; - private readonly short[] data; + private readonly int[] _displacement; + private readonly short[] _data; + + public int this[int state, int input] => _data[_displacement[state] + input]; + public CompressedTable(short[,] uncompressed) { // Create a displacement table - var numStates = uncompressed.GetUpperBound(0) + 1; - displacement = new int[numStates]; + int numStates = uncompressed.GetUpperBound(0) + 1; + + _displacement = new int[numStates]; - var table = new List(); - var offsetHashes = new List(); + List table = new List(); + List offsetHashes = new List(); // Add the first range straight away. - table.AddRange(StateActions(0, uncompressed)); - displacement[0] = 0; + table.AddRange(StateActions(0, uncompressed)); + _displacement[0] = 0; // For each additional state, try to match as best as possible with the existing list for (int state = 1; state < numStates; ++state) { - var stateActions = StateActions(state, uncompressed).ToArray(); - var hash = stateActions.Aggregate(0, (acc, x) => (acc * 397) ^ x); + short[] stateActions = StateActions(state, uncompressed).ToArray(); + int hash = stateActions.Aggregate(0, (acc, x) => (acc * 397) ^ x); // Need to run *past* the table in order to add wholly incompatible matches // this will not index out of the table, so there is no need to worry. - var tableCount = table.Count(); + int tableCount = table.Count(); for (int displacementIndex = 0; displacementIndex <= tableCount; ++displacementIndex) { - if (displacementIndex < offsetHashes.Count && offsetHashes[displacementIndex] != hash) - { - continue; - } + if (displacementIndex < offsetHashes.Count && offsetHashes[displacementIndex] != hash) + continue; - bool spotFound = true; + bool spotFound = true; int offset = displacementIndex; - foreach (var stateAction in stateActions) + + foreach (short stateAction in stateActions) { if (offset >= tableCount) - { // Run out of table to check, but is still OK. break; - } - if (stateAction != table[offset]) + else if (stateAction != table[offset]) { // Not found spotFound = false; + break; } + ++offset; } // Exiting the loop, if a spot is found add the correct displacement index if (spotFound) { - displacement[state] = displacementIndex; + _displacement[state] = displacementIndex; // Add to the state table as much as is needed. table.AddRange(stateActions.Skip(offset - displacementIndex)); - // Add the hashes that does not exist up to the displacement index - for (int i = offsetHashes.Count; i < displacementIndex; ++i) - { - var offsetHash = 0; - for (int j = i; j < stateActions.Length; ++j ) - { - offsetHash = (offsetHash * 397) ^ table[j]; - } - offsetHashes.Add(offsetHash); - } - offsetHashes.Add(hash); - - // Break loop to process next state. - break; + // Add the hashes that does not exist up to the displacement index + for (int i = offsetHashes.Count; i < displacementIndex; ++i) + { + int offsetHash = 0; + + for (int j = i; j < stateActions.Length; ++j) + offsetHash = (offsetHash * 397) ^ table[j]; + + offsetHashes.Add(offsetHash); + } + + offsetHashes.Add(hash); + + // Break loop to process next state. + break; } } } - data = table.ToArray(); + _data = table.ToArray(); } private IEnumerable StateActions(int state, short[,] uncompressed) { for (int i = 0; i <= uncompressed.GetUpperBound(1); ++i) - { yield return uncompressed[state, i]; - } - } - - public int this[int state, int input] - { - get { return data[displacement[state] + input]; } - set { throw new NotImplementedException(); } } } } \ No newline at end of file diff --git a/Piglet/Common/SublistSearch.cs b/Piglet/Common/SublistSearch.cs index 6c800dc..42fd5ee 100644 --- a/Piglet/Common/SublistSearch.cs +++ b/Piglet/Common/SublistSearch.cs @@ -1,43 +1,39 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; - -namespace Piglet.Common -{ - /// - /// Extension methods for finding the index of a sublist - /// - public static class SublistSearch - { - /// - /// Get the index of the a sublist - /// - /// Type of list - /// List to search - /// List to find - /// Index of start of sublist, or -1 if not found - public static int IndexOf(this IList haystack, IList needle) - { - // Stupid implementation. This could probably benefit from - // using a string search algorithm. - for (int i = 0; i < haystack.Count - needle.Count; ++i) - { - bool found = true; - for (int j = 0; j < needle.Count; ++j) - { - if (!haystack[i + j].Equals(needle[j])) - { - found = false; - break; - } - } - - if (found) - return i; - } - - return -1; - } - } -} +using System.Collections.Generic; + +namespace Piglet.Common +{ + /// + /// Extension methods for finding the index of a sublist + /// + public static class SublistSearch + { + /// + /// Get the index of the a sublist + /// + /// Type of list + /// List to search + /// List to find + /// Index of start of sublist, or -1 if not found + public static int IndexOf(this IList haystack, IList needle) + { + // Stupid implementation. This could probably benefit from using a string search algorithm. + for (int i = 0; i < haystack.Count - needle.Count; ++i) + { + bool found = true; + + for (int j = 0; j < needle.Count; ++j) + if (!Equals(haystack[i + j], needle[j])) + { + found = false; + + break; + } + + if (found) + return i; + } + + return -1; + } + } +} \ No newline at end of file diff --git a/Piglet/Lexer/Configuration/ILexerConfigurator.cs b/Piglet/Lexer/Configuration/ILexerConfigurator.cs index 424769d..a697cc0 100644 --- a/Piglet/Lexer/Configuration/ILexerConfigurator.cs +++ b/Piglet/Lexer/Configuration/ILexerConfigurator.cs @@ -37,6 +37,11 @@ public interface ILexerConfigurator /// bool MinimizeDfa { get; set; } + /// + /// Specifies whether the lexer is case-insensitive (), or case-sensitive (). + /// + bool IgnoreCase { get; set; } + /// /// Gets and sets the lexer runtime, which is the method that the resulting lexer will be constructed with. /// diff --git a/Piglet/Lexer/Configuration/LexerConfigurator.cs b/Piglet/Lexer/Configuration/LexerConfigurator.cs index 3c5740a..9e04304 100644 --- a/Piglet/Lexer/Configuration/LexerConfigurator.cs +++ b/Piglet/Lexer/Configuration/LexerConfigurator.cs @@ -1,78 +1,75 @@ -using System; -using System.Collections.Generic; -using System.IO; -using System.Linq; -using Piglet.Lexer.Construction; -using Piglet.Lexer.Runtime; - -namespace Piglet.Lexer.Configuration -{ - internal class LexerConfigurator : ILexerConfigurator - { - private readonly List>> tokens; - private readonly List ignore; - - public LexerConfigurator() - { - tokens = new List>>(); - ignore = new List(); - EndOfInputTokenNumber = -1; - MinimizeDfa = true; - Runtime = LexerRuntime.Tabular; - } - - public ILexer CreateLexer() - { - // For each token, create a NFA - IList nfas = tokens.Select(token => NfaBuilder.Create(new ShuntingYard(new RegExLexer( new StringReader(token.Item1))))).ToList(); - foreach (var ignoreExpr in ignore) - { - nfas.Add(NfaBuilder.Create(new ShuntingYard(new RegExLexer(new StringReader(ignoreExpr))))); - } - - // Create a merged NFA - NFA mergedNfa = NFA.Merge(nfas); - - // If we desire a NFA based lexer, stop now - if (Runtime == LexerRuntime.Nfa) - { - return new NfaLexer(mergedNfa, nfas, tokens, EndOfInputTokenNumber); - } - - // Convert the NFA to a DFA - DFA dfa = DFA.Create(mergedNfa); - - // Minimize the DFA if required - dfa.Minimize(); - - // If we desire a DFA based lexer, stop - if (Runtime == LexerRuntime.Dfa) - { - // TODO: - // The input ranges which will have been previously split into the smallest distinct - // units will need to be recombined in order for this to work as fast as possible. - //dfa.CombineInputRanges(); - return new DfaLexer(dfa, nfas, tokens, EndOfInputTokenNumber); - } - - // Convert the dfa to table form - var transitionTable = new TransitionTable(dfa, nfas, tokens); - - return new TabularLexer(transitionTable, EndOfInputTokenNumber); - } - - public void Token(string regEx, Func action) - { - tokens.Add(new Tuple>(regEx, action)); - } - - public void Ignore(string regEx) - { - ignore.Add(regEx); - } - - public int EndOfInputTokenNumber { get; set; } - public bool MinimizeDfa { get; set; } - public LexerRuntime Runtime { get; set; } - } +using System.Collections.Generic; +using System.Linq; +using System.IO; +using System; + +using Piglet.Lexer.Construction; +using Piglet.Lexer.Runtime; + +namespace Piglet.Lexer.Configuration +{ + internal sealed class LexerConfigurator + : ILexerConfigurator + { + private readonly List<(string regex, Func action)> _tokens = new List<(string, Func)>(); + private readonly List _ignore = new List(); + + + public int EndOfInputTokenNumber { get; set; } = -1; + public bool MinimizeDfa { get; set; } = true; + public bool IgnoreCase { get; set; } = false; + public LexerRuntime Runtime { get; set; } = LexerRuntime.Tabular; + + + public ILexer CreateLexer() + { + // For each token, create a NFA + IList nfas = _tokens.Select(token => + { + try + { + return NfaBuilder.Create(new ShuntingYard(new RegexLexer(new StringReader(token.regex)), IgnoreCase)); + } + catch (Exception ex) + { + throw new LexerConstructionException($"Malformed regex '{token.regex}'.", ex); + } + }).ToList(); + + foreach (string ignoreExpr in _ignore) + nfas.Add(NfaBuilder.Create(new ShuntingYard(new RegexLexer(new StringReader(ignoreExpr)), IgnoreCase))); + + // Create a merged NFA + NFA mergedNfa = NFA.Merge(nfas); + + // If we desire a NFA based lexer, stop now + if (Runtime == LexerRuntime.Nfa) + return new NfaLexer(mergedNfa, nfas, _tokens, EndOfInputTokenNumber); + + // Convert the NFA to a DFA + DFA dfa = DFA.Create(mergedNfa); + + // Minimize the DFA if required + dfa.Minimize(); + + // If we desire a DFA based lexer, stop + if (Runtime == LexerRuntime.Dfa) + { + // TODO: + // The input ranges which will have been previously split into the smallest distinct + // units will need to be recombined in order for this to work as fast as possible. + //dfa.CombineInputRanges(); + return new DfaLexer(dfa, nfas, _tokens, EndOfInputTokenNumber); + } + + // Convert the dfa to table form + TransitionTable transitionTable = new TransitionTable(dfa, nfas, _tokens); + + return new TabularLexer(transitionTable, EndOfInputTokenNumber); + } + + public void Token(string regex, Func action) => _tokens.Add((regex, action)); + + public void Ignore(string regex) => _ignore.Add(regex); + } } \ No newline at end of file diff --git a/Piglet/Lexer/Construction/CharRange.cs b/Piglet/Lexer/Construction/CharRange.cs index 370b267..bba982a 100644 --- a/Piglet/Lexer/Construction/CharRange.cs +++ b/Piglet/Lexer/Construction/CharRange.cs @@ -2,60 +2,43 @@ namespace Piglet.Lexer.Construction { - internal class CharRange : IComparable + internal sealed class CharRange + : IComparable { public char From { get; set; } public char To { get; set; } - private static string ToGraphSafeString(char c) + + public int CompareTo(CharRange? other) { - return c >= 33 && c <= 0x7e - ? c.ToString() - : string.Format("0x{0:x2}", (int)c); - } + if (other is null) + return 1; - public int CompareTo(CharRange other) - { - int cmp = From - other.From; - return cmp == 0 ? To - other.To : cmp; - } + int cmp = From - other.From; - public override string ToString() - { - return From == To ? ToGraphSafeString(From) : string.Format("{0}-{1}", ToGraphSafeString(From), ToGraphSafeString(To)); + return cmp == 0 ? To - other.To : cmp; } - public bool Equals(CharRange other) + public override string ToString() => From == To ? ToGraphSafeString(From) : $"{ToGraphSafeString(From)}-{ToGraphSafeString(To)}"; + + public bool Equals(CharRange? other) { - if (ReferenceEquals(null, other)) return false; - if (ReferenceEquals(this, other)) return true; + if (other is null) + return false; + else if (ReferenceEquals(this, other)) + return true; + return other.From == From && other.To == To; } - public override bool Equals(object obj) - { - if (ReferenceEquals(null, obj)) return false; - if (ReferenceEquals(this, obj)) return true; - if (obj.GetType() != typeof (CharRange)) return false; - return Equals((CharRange) obj); - } + public override bool Equals(object? obj) => obj is CharRange other && Equals(other); - public override int GetHashCode() - { - unchecked - { - return (From.GetHashCode()*397) ^ To.GetHashCode(); - } - } + public override int GetHashCode() => unchecked((From.GetHashCode() * 397) ^ To.GetHashCode()); - public static bool operator ==(CharRange left, CharRange right) - { - return Equals(left, right); - } + private static string ToGraphSafeString(char c) => c >= 33 && c <= 0x7e ? c.ToString() : $"0x{(int)c:x2}"; - public static bool operator !=(CharRange left, CharRange right) - { - return !Equals(left, right); - } + public static bool operator ==(CharRange left, CharRange right) => Equals(left, right); + + public static bool operator !=(CharRange left, CharRange right) => !(left == right); } } \ No newline at end of file diff --git a/Piglet/Lexer/Construction/CharSet.cs b/Piglet/Lexer/Construction/CharSet.cs index d36b7d0..8c28466 100644 --- a/Piglet/Lexer/Construction/CharSet.cs +++ b/Piglet/Lexer/Construction/CharSet.cs @@ -1,182 +1,145 @@ -using System; -using System.Collections.Generic; -using System.Linq; - -namespace Piglet.Lexer.Construction -{ - internal class CharSet - { - private IList ranges = new List(); - - public IEnumerable Ranges { get { return ranges; } } - - public CharSet() - { - } - - public CharSet(IEnumerable ranges) - { - this.ranges = ranges.ToList(); - } - - public CharSet(bool combine, params char[] ranges) - { - if (ranges.Length % 2 != 0) - throw new ArgumentException("Number of chars in ranges must be an even number"); - for (int i = 0; i < ranges.Length; i += 2) - { - AddRange(ranges[i], ranges[i+1], combine); - } - } - - public void Add(char c) - { - AddRange(c,c, true); - } - - public void AddRange(char from, char to, bool combine = true) - { - if (from > to) - { - char pivot = to; - to = from; - from = pivot; - } - - if (combine) - { - // See if there is an old range that contains the new from as the to - // in that case merge the ranges - var range = ranges.SingleOrDefault(f => f.To == from); - if (range != null) - { - range.To = to; - return; - } - - // To the same thing the other direction - range = ranges.SingleOrDefault(f => f.From == to); - if (range != null) - { - range.From = from; - return; - } - } - - // Ranges are not mergeable. Add the range straight up - ranges.Add(new CharRange { From = from, To = to }); - } - - public bool Any() - { - return ranges.Any(); - } - - public override string ToString() - { - if ( !Any()) return "ε"; - return string.Join(", ", ranges.Select(f => f.ToString()).ToArray()); - } - - public void UnionWith(CharSet charSet) - { - foreach (var charRange in charSet.ranges) - { - if (!ranges.Contains(charRange)) - { - // Sanity check -#if DEBUG - if (ranges.Any( f => f.From == charRange.From || f.To == charRange.To)) - throw new Exception("Do not want"); -#endif - ranges.Add(charRange); - } - } - } - - public CharSet Except(CharSet except) - { - var cs = new CharSet(); - - foreach (var range in ranges) - { - foreach (var clippedRange in ClipRange(range, except.ranges)) - { - cs.AddRange(clippedRange.From, clippedRange.To); - } - } - return cs; - } - - private IEnumerable ClipRange(CharRange range, IList excludedCharRanges) - { - char from = range.From; - char to = range.To; - - foreach (var excludedRange in excludedCharRanges) - { - // If the range is fully excluded by the excluded range, yield nothing - if (excludedRange.From <= from && excludedRange.To >= to) - { - yield break; - } - - // Check if the excluded range is wholly contained within the range - if (excludedRange.From > from && excludedRange.To < to ) - { - // Split this range and return - foreach (var charRange in ClipRange(new CharRange {From = @from, To = (char)(excludedRange.From - 1)}, excludedCharRanges)) - { - yield return charRange; - } - - // Second split - foreach (var charRange in ClipRange(new CharRange { From = (char)(excludedRange.To + 1), To = to }, excludedCharRanges)) - { - yield return charRange; - } - - yield break; - } - - // Trim the edges of the range - if (to >= excludedRange.From && to <= excludedRange.To) - { - to = (char)(excludedRange.From - 1); - } - - if (from >= excludedRange.From && from <= excludedRange.To) - { - from = (char)(excludedRange.To + 1); - } - } - - // If the range has been clipped away to nothing, then quit - if (to < from) - yield break; - - // Return the possibly clipped range - yield return new CharRange { From = from, To = to}; - } - - public CharSet Union(CharSet charRange) - { - var c = new CharSet(); - foreach (var range in ranges) - { - c.AddRange(range.From, range.To); - } - - foreach (var range in charRange.ranges) - { - c.AddRange(range.From, range.To); - } - return c; - } - - public bool ContainsChar(char input) - { - return ranges.Any(charRange => charRange.From <= input && charRange.To >= input); - } - } +using System.Collections.Generic; +using System.Linq; +using System; + +namespace Piglet.Lexer.Construction +{ + internal sealed class CharSet + { + private readonly IList _ranges = new List(); + + public IEnumerable Ranges => _ranges; + + + public CharSet() + { + } + + public CharSet(IEnumerable ranges) => _ranges = ranges.ToList(); + + public CharSet(bool combine, params char[] ranges) + { + if (ranges.Length % 2 != 0) + throw new ArgumentException("Number of chars in ranges must be an even number."); + + for (int i = 0; i < ranges.Length; i += 2) + AddRange(ranges[i], ranges[i + 1], combine); + } + + public void Add(char c) => AddRange(c, c, true); + + public void AddRange(char from, char to, bool combine = true) + { + if (from > to) + { + char pivot = to; + + to = from; + from = pivot; + } + + if (combine) + // See if there is an old range that contains the new from as the to in that case merge the ranges + if (_ranges.SingleOrDefault(f => f.To == from) is { } r1) + { + r1.To = to; + + return; + } + else if (_ranges.SingleOrDefault(f => f.From == to) is { } r2) // To the same thing the other direction + { + r2.From = from; + + return; + } + + // Ranges are not mergeable. Add the range straight up + _ranges.Add(new CharRange { From = from, To = to }); + } + + public bool Any() => _ranges.Any(); + + public override string ToString() => !Any() ? "ε" : string.Join(", ", _ranges.Select(f => f.ToString()).ToArray()); + + public void UnionWith(CharSet charSet) + { + foreach (CharRange charRange in charSet._ranges) + if (!_ranges.Contains(charRange)) + { + // Sanity check +//#if DEBUG +// if (ranges.Any(f => f.From == charRange.From || f.To == charRange.To)) +// throw new Exception("Do not want"); +//#endif + _ranges.Add(charRange); + } + } + + public CharSet Except(CharSet except) + { + CharSet cs = new CharSet(); + + foreach (CharRange range in _ranges) + foreach (CharRange clippedRange in ClipRange(range, except._ranges)) + cs.AddRange(clippedRange.From, clippedRange.To); + + return cs; + } + + private IEnumerable ClipRange(CharRange range, IList excludedCharRanges) + { + char from = range.From; + char to = range.To; + + foreach (CharRange excludedRange in excludedCharRanges) + { + // If the range is fully excluded by the excluded range, yield nothing + if (excludedRange.From <= from && excludedRange.To >= to) + yield break; + + // Check if the excluded range is wholly contained within the range + if (excludedRange.From > from && excludedRange.To < to) + { + // Split this range and return + foreach (CharRange charRange in ClipRange(new CharRange { From = @from, To = (char)(excludedRange.From - 1) }, excludedCharRanges)) + yield return charRange; + + // Second split + foreach (CharRange charRange in ClipRange(new CharRange { From = (char)(excludedRange.To + 1), To = to }, excludedCharRanges)) + yield return charRange; + + yield break; + } + + // Trim the edges of the range + if (to >= excludedRange.From && to <= excludedRange.To) + to = (char)(excludedRange.From - 1); + + if (from >= excludedRange.From && from <= excludedRange.To) + from = (char)(excludedRange.To + 1); + } + + // If the range has been clipped away to nothing, then quit + if (to < from) + yield break; + + // Return the possibly clipped range + yield return new CharRange { From = from, To = to }; + } + + public CharSet Union(CharSet charRange) + { + CharSet c = new CharSet(); + + foreach (CharRange range in _ranges) + c.AddRange(range.From, range.To); + + foreach (CharRange range in charRange._ranges) + c.AddRange(range.From, range.To); + + return c; + } + + public bool ContainsChar(char input) => _ranges.Any(charRange => charRange.From <= input && charRange.To >= input); + } } \ No newline at end of file diff --git a/Piglet/Lexer/Construction/DFA.cs b/Piglet/Lexer/Construction/DFA.cs index 32d11e7..0a16366 100644 --- a/Piglet/Lexer/Construction/DFA.cs +++ b/Piglet/Lexer/Construction/DFA.cs @@ -1,42 +1,37 @@ -using System; using System.Collections.Generic; using System.Linq; +using System; namespace Piglet.Lexer.Construction { - internal class DFA : FiniteAutomata + internal sealed class DFA + : FiniteAutomata { - public class State : BaseState + public sealed class State + : BaseState { public ISet NfaStates { get; private set; } public bool Mark { get; set; } - public State(ISet nfaStates) - { - NfaStates = nfaStates; - } - public IEnumerable LegalMoves(Transition[] fromTransitions) - { - return fromTransitions.SelectMany(f => f.ValidInput.Ranges).Distinct(); - } + public State(ISet nfaStates) => NfaStates = nfaStates; - public override string ToString() - { - // Purely for debugging purposes - return string.Format( "{0} {{{1}}}", StateNumber, String.Join( ", ", NfaStates)); - } + public IEnumerable LegalMoves(Transition[] fromTransitions) => + fromTransitions.SelectMany(f => f.ValidInput.Ranges).Distinct(); + + // Purely for debugging purposes + public override string ToString() => $"{StateNumber} {{{string.Join(", ", NfaStates)}}}"; - public override bool AcceptState + public override bool AcceptState { - get { return NfaStates.Any(f=>f.AcceptState); } - set {} // Do nothing, cannot set + get => NfaStates.Any(f => f.AcceptState); + set { } // Do nothing, cannot set } } public static DFA Create(NFA nfa) { - var closures = nfa.GetAllClosures(); + IDictionary> closures = nfa.GetAllClosures(); // The valid input ranges that the NFA contains will need to be split up so that // the smallest possible units which NEVER overlaps will be contained in each of the @@ -44,77 +39,70 @@ public static DFA Create(NFA nfa) nfa.DistinguishValidInputs(); // Get the closure set of S0 - var dfa = new DFA(); - dfa.States.Add(new State(closures[nfa.StartState])); - + DFA dfa = new DFA(); + + if (nfa.StartState is null) + throw new ArgumentException("The NFA's start state must not be null", nameof(nfa)); + else + dfa.States.Add(new State(closures[nfa.StartState])); + while (true) { // Get an unmarked state in dfaStates - var t = dfa.States.FirstOrDefault(f => !f.Mark); - if (null == t) - { - // We're done! - break; - } + State t = dfa.States.FirstOrDefault(f => !f.Mark); + + if (t is null) + break; // We're done! t.Mark = true; // Get the move states by stimulating this DFA state with // all possible characters. - var fromTransitions = nfa.Transitions.Where(f => t.NfaStates.Contains(f.From)).ToArray(); - - var moveDestinations = new Dictionary>(); - foreach (var fromTransition in fromTransitions) - { - foreach (var range in fromTransition.ValidInput.Ranges) - { - List destList; - if (!moveDestinations.TryGetValue(range, out destList)) - { - destList = new List(); - moveDestinations.Add(range, destList); - } - - destList.Add(fromTransition.To); - } - } + Transition[] fromTransitions = nfa.Transitions.Where(f => t.NfaStates.Contains(f.From)).ToArray(); + Dictionary> moveDestinations = new Dictionary>(); + + foreach (Transition fromTransition in fromTransitions) + foreach (CharRange range in fromTransition.ValidInput.Ranges) + { + if (!moveDestinations.TryGetValue(range, out List? destList)) + { + destList = new List(); + moveDestinations.Add(range, destList); + } + + destList.Add(fromTransition.To); + } foreach (CharRange c in t.LegalMoves(fromTransitions)) { - var moveSet = moveDestinations[c]; + List moveSet = moveDestinations[c]; + if (moveSet.Any()) { // Get the closure of the move set. This is the NFA states that will form the new set ISet moveClosure = new HashSet(); - foreach (var moveState in moveSet) - { + foreach (NFA.State moveState in moveSet) moveClosure.UnionWith(closures[moveState]); - } - var newState = new State(moveClosure); + State newState = new State(moveClosure); // See if the new state already exists. If so change the reference to point to // the already created object, since we will need to add a transition back to the same object - var oldState = dfa.States.FirstOrDefault(f => f.NfaStates.SetEquals(newState.NfaStates));/* f.NfaStates.Count == newState.NfaStates.Count && - !f.NfaStates.Except(newState.NfaStates).Any() && + State oldState = dfa.States.FirstOrDefault(f => f.NfaStates.SetEquals(newState.NfaStates));/* f.NfaStates.Count == newState.NfaStates.Count && + !f.NfaStates.Except(newState.NfaStates).Any() && !newState.NfaStates.Except(f.NfaStates).Any());*/ - if (oldState == null) - { + if (oldState is null) dfa.States.Add(newState); - } else - { - // New state wasn't that new. We already have one exacly like it in the DFA. Set - // netstate to oldstate so that the created transition will be correct (still need to - // create a transition) + // New state wasn't that new. We already have one exacly like it in the DFA. Set netstate to + // oldstate so that the created transition will be correct (still need to create a transition) newState = oldState; - } - // See if there already is a transition. In that case, add our character to the list - // of valid values - var transition = dfa.Transitions.SingleOrDefault(f => f.From == t && f.To == newState); - if (transition == null) + // See if there already is a transition. In that case, add our character to the list of valid values + Transition transition = dfa.Transitions.SingleOrDefault(f => f.From == t && f.To == newState); + + if (transition is null) { // No transition has been found. Create a new one. transition = new Transition(t, newState); @@ -134,7 +122,7 @@ public static DFA Create(NFA nfa) public void Minimize() { - var distinct = new TriangularTable(States.Count, f => f.StateNumber ); + TriangularTable distinct = new TriangularTable(States.Count, f => f.StateNumber ); distinct.Fill(-1); // Fill with empty states // Create a function for the distinct state pairs and performing an action on them @@ -142,81 +130,74 @@ public void Minimize() { for (int i = 0; i < States.Count; ++i) { - var p = States[i]; + State p = States[i]; + for (int j = i + 1; j < States.Count; ++j) { - var q = States[j]; + State q = States[j]; + action(p, q); } - } + } }; // Get a set of all valid input ranges that we have in the DFA ISet allValidInputs = new HashSet(); - foreach (var transition in Transitions) - { + + foreach (Transition transition in Transitions) allValidInputs.UnionWith(transition.ValidInput.Ranges); - } - // For every distinct pair of states, if one of them is an accepting state - // and the other one is not set the distinct + // For every distinct pair of states, if one of them is an accepting state and the other one is not set the distinct distinctStatePairs((p, q) => { - var pIsAcceptState = p.AcceptState; - var bIsAcceptState = q.AcceptState; + bool pIsAcceptState = p.AcceptState; + bool bIsAcceptState = q.AcceptState; + if (bIsAcceptState && pIsAcceptState) { // If both are accepting states, then we might have an issue merging them. - // this is because we use multiple regular expressions with different endings when - // constructing lexers. - var pAcceptStates = p.NfaStates.Where(f => f.AcceptState).ToList(); - var qAcceptStates = q.NfaStates.Where(f => f.AcceptState).ToList(); + // This is because we use multiple regular expressions with different endings when constructing lexers. + List pAcceptStates = p.NfaStates.Where(f => f.AcceptState).ToList(); + List qAcceptStates = q.NfaStates.Where(f => f.AcceptState).ToList(); if (pAcceptStates.Count() == qAcceptStates.Count()) { - foreach (var pAcceptState in pAcceptStates) - { + foreach (NFA.State pAcceptState in pAcceptStates) if (!qAcceptStates.Contains(pAcceptState)) - { - // Since the accepting states differ, its not cool to merge - // these two states. - distinct[p, q] = int.MaxValue; - } - } + distinct[p, q] = int.MaxValue; // Since the accepting states differ, its not cool to merge these two states. } else - { - // Not the same number of states, not cool to merge - distinct[p, q] = int.MaxValue; - } + distinct[p, q] = int.MaxValue; // Not the same number of states, not cool to merge } if (pIsAcceptState ^ bIsAcceptState) - { distinct[p, q] = int.MaxValue; - } }); - // Make a dictionary of from transitions. This is well worth the time, since - // this gets accessed lots of times. - var targetDict = new Dictionary>(); - foreach (var transition in Transitions) - { - Dictionary toDict; - targetDict.TryGetValue(transition.From, out toDict); - if (toDict == null) - { - toDict = new Dictionary(); - targetDict.Add(transition.From, toDict); - } - foreach (var range in transition.ValidInput.Ranges) - { - toDict.Add(range, transition.To); - } - } + // Make a dictionary of from transitions. This is well worth the time, since this gets accessed lots of times. + Dictionary> targetDict = new Dictionary>(); + + foreach (Transition transition in Transitions) + { + if (transition.From is null) + throw new InvalidOperationException("The outgoing state of a transition must not be null."); + + targetDict.TryGetValue(transition.From, out Dictionary? toDict); + + if (toDict is null) + { + toDict = new Dictionary(); + + targetDict.Add(transition.From, toDict); + } + + foreach (CharRange range in transition.ValidInput.Ranges) + toDict.Add(range, transition.To); + } // Start iterating bool changes; + do { changes = false; @@ -225,26 +206,15 @@ public void Minimize() { if (distinct[p, q] == -1) { - Func targetState = (state, c) => - { - Dictionary charDict; - if (targetDict.TryGetValue(state, out charDict)) - { - State toState; - if (charDict.TryGetValue(c, out toState)) - { - return toState; - } - } - return null; - }; - - foreach (var a in allValidInputs) + State? targetState(State state, CharRange c) => targetDict.TryGetValue(state, out Dictionary? charDict) && + charDict.TryGetValue(c, out State? toState) ? toState : null; + + foreach (CharRange a in allValidInputs) { - var qa = targetState(q, a); - var pa = targetState(p, a); + State? qa = targetState(q, a); + State? pa = targetState(p, a); - if (pa == null ^ qa == null) + if (pa is null ^ qa is null) { // If one of them has a transition on this character range but the other one doesn't then // they are separate. @@ -254,131 +224,112 @@ public void Minimize() break; } - // If both are null, then we carry on. - // The other one is null implictly since we have XOR checked it earlier - if (qa == null) continue; + // If both are null, then we carry on. The other one is null implictly since we have XOR checked it earlier + if (qa is null) + continue; if (distinct[qa, pa] != -1) { distinct[p, q] = a.GetHashCode(); changes = true; + break; } } } }); - } while (changes); + } + while (changes); // Merge states that still have blank square // To make this work we have to bunch states together since the indices will be screwed up - var mergeSets = new List>(); - Func> findMergeList = s => mergeSets.FirstOrDefault(m => m.Contains(s)); + List> mergeSets = new List>(); + Func>? findMergeList = s => mergeSets.FirstOrDefault(m => m.Contains(s)); distinctStatePairs((p, q) => { // No need to check those that we have already determined to be distinct - if (distinct[p, q] != -1) return; - - // These two states are supposed to merge! - // See if p or q is already part of a merge list! - var pMergeSet = findMergeList(p); - var qMergeSet = findMergeList(q); - - if (pMergeSet == null && qMergeSet == null) - { - // No previous set for either - // Add a new merge set - mergeSets.Add(new HashSet { p, q }); - } - else if (pMergeSet != null && qMergeSet == null) - { - // Add q to pMergeSet - pMergeSet.Add(q); - } - else if (pMergeSet == null) - { - // Add p to qMergeSet - qMergeSet.Add(p); - } - else + if (distinct[p, q] != -1) + return; + + // These two states are supposed to merge! See if p or q is already part of a merge list! + ISet? pMergeSet = findMergeList(p); + ISet? qMergeSet = findMergeList(q); + + if (pMergeSet is null && qMergeSet is null) + mergeSets.Add(new HashSet { p, q }); // No previous set for either. Add a new merge set + else if (pMergeSet != null && qMergeSet is null) + pMergeSet.Add(q); // Add q to pMergeSet + else if (pMergeSet is null) + qMergeSet.Add(p); // Add p to qMergeSet + else if (pMergeSet != qMergeSet) { // Both previously have merge sets // If its not the same set (which it shoudln't be) then add their union - if (pMergeSet != qMergeSet) - { - // Union everything into the pMergeSet - pMergeSet.UnionWith(qMergeSet); + + // Union everything into the pMergeSet + pMergeSet.UnionWith(qMergeSet); - // Remove the qMergeSet - mergeSets.Remove(qMergeSet); - } + // Remove the qMergeSet + mergeSets.Remove(qMergeSet); } }); // Armed with the merge sets, we can now do the actual merge - foreach (var mergeSet in mergeSets) + foreach (ISet mergeSet in mergeSets) { // The lone state that should remain is the FIRST set in the mergeset - var stateList = mergeSet.ToList(); - var outputState = stateList[0]; + List stateList = mergeSet.ToList(); + State outputState = stateList[0]; - // If this statelist contains the startstate, the new startstate will have to be - // the new output state + // If this statelist contains the startstate, the new startstate will have to be the new output state if (stateList.Contains(StartState)) - { StartState = outputState; - } - // Iterate over all the states in the merge list except for the one we have decided - // to merge everything into. + // Iterate over all the states in the merge list except for the one we have decided to merge everything into. for (int i = 1; i < stateList.Count; ++i) { - var toRemove = stateList[i]; - + State toRemove = stateList[i]; // Find all transitions that went to this state - var toTransitions = Transitions.Where(f => f.To == toRemove).ToList(); - foreach (var transition in toTransitions) + List> toTransitions = Transitions.Where(f => f.To == toRemove).ToList(); + + foreach (Transition transition in toTransitions) { // There can be two cases here, either there already is a new transition to be found, in // which case we can merge the valid input instead. The alternative is that there is no prior // transition, in which case we repoint our transition to the output state. - var existingTransition = Transitions.FirstOrDefault(f => f.From == transition.From && f.To == outputState); + Transition existingTransition = Transitions.FirstOrDefault(f => f.From == transition.From && f.To == outputState); + if (existingTransition != null) { existingTransition.ValidInput.UnionWith(transition.ValidInput); Transitions.Remove(transition); // Remove the old transition } else - { transition.To = outputState; - } } // Find all transitions that went from this state - var fromTransitions = Transitions.Where(f => f.From == toRemove).ToList(); - foreach (var transition in fromTransitions) + List> fromTransitions = Transitions.Where(f => f.From == toRemove).ToList(); + + foreach (Transition transition in fromTransitions) { // Same two cases as the code above - var existingTransition = Transitions.FirstOrDefault(f => f.From == outputState && f.To == transition.To); + Transition existingTransition = Transitions.FirstOrDefault(f => f.From == outputState && f.To == transition.To); + if (existingTransition != null) { existingTransition.ValidInput.UnionWith(transition.ValidInput); Transitions.Remove(transition); // Remove the old transition } else - { transition.From = outputState; - } } // Since before removing this state, we need to merge the list of NFA states that created both of these states - foreach (var nfaState in toRemove.NfaStates) - { + foreach (NFA.State nfaState in toRemove.NfaStates) if (!outputState.NfaStates.Contains(nfaState)) - { outputState.NfaStates.Add(nfaState); - } - } // There should be no more references to this state. It can thus be removed. States.Remove(toRemove); @@ -389,9 +340,6 @@ public void Minimize() AssignStateNumbers(); } - public override IEnumerable Closure(State[] states, ISet visitedStates = null) - { - return states; - } + public override IEnumerable Closure(State[] states, ISet? visitedStates = null) => states; } -} \ No newline at end of file +} diff --git a/Piglet/Lexer/Construction/DotNotation/DotNotation.cs b/Piglet/Lexer/Construction/DotNotation/DotNotation.cs index 3f6cce1..22b25df 100644 --- a/Piglet/Lexer/Construction/DotNotation/DotNotation.cs +++ b/Piglet/Lexer/Construction/DotNotation/DotNotation.cs @@ -15,12 +15,10 @@ public static class DotNotation /// /// Regular expression /// Minimize the resulting DFA + /// Determines whether the regular expression is case-insensitive /// Dot notation NFA graph /// Dot notation DFA graph - public static void GetDfaAndNfaGraphs(string regex, bool minimize, out string nfaString, out string dfaString) - { - GetDfaAndNfaGraphs(regex, null, minimize, out nfaString, out dfaString); - } + public static void GetDfaAndNfaGraphs(string regex, bool minimize, bool ignoreCase, out string nfaString, out string dfaString) => GetDfaAndNfaGraphs(regex, null, minimize, ignoreCase, out nfaString, out dfaString); /// /// Get the DFA and NFA graphs for a given regular expression and highlight active @@ -29,17 +27,18 @@ public static void GetDfaAndNfaGraphs(string regex, bool minimize, out string nf /// Regular expression /// Input string /// Minimize the resulting DFA + /// Determines whether the regular expression is case-insensitive /// Dot notation NFA graph /// Dot notation DFA graph - public static void GetDfaAndNfaGraphs(string regex, string input, bool minimize, out string nfaString, out string dfaString) + public static void GetDfaAndNfaGraphs(string regex, string? input, bool minimize, bool ignoreCase, out string nfaString, out string dfaString) { - var nfa = NfaBuilder.Create(new ShuntingYard(new RegExLexer(new StringReader(regex)))); + NFA nfa = NfaBuilder.Create(new ShuntingYard(new RegexLexer(new StringReader(regex)), ignoreCase)); nfaString = nfa.AsDotNotation(input, "NFA"); - var dfa = DFA.Create(nfa); + DFA dfa = DFA.Create(nfa); + if (minimize) - { dfa.Minimize(); - } + dfaString = dfa.AsDotNotation(input, "DFA"); } @@ -51,7 +50,8 @@ public static void GetDfaAndNfaGraphs(string regex, string input, bool minimize, /// Input to highlight the current state with /// Graph name as specified in notation /// - internal static string AsDotNotation(this FiniteAutomata automata, string input, string graphName = "automata") where TState : FiniteAutomata.BaseState + internal static string AsDotNotation(this FiniteAutomata automata, string? input, string graphName = "automata") + where TState : FiniteAutomata.BaseState { // Draw the *FA as a directed graph with the state numbers in circles // Use a double circle for accepting states @@ -63,62 +63,50 @@ internal static string AsDotNotation(this FiniteAutomata automat // 1 -> 2 [label=a] //} - var sb = new StringBuilder(); + StringBuilder sb = new StringBuilder(); sb.Append("digraph " + graphName + " {\n"); sb.Append("\t[node shape=\"circle\"]\n"); sb.Append("\tgraph [rankdir=\"LR\"]\n"); - IEnumerable currentStates = Enumerable.Empty(); + IEnumerable? currentStates = Enumerable.Empty(); bool matchSuccessful = false; if (!string.IsNullOrEmpty(input)) { - var stimulateResult = automata.Stimulate(input); + StimulateResult stimulateResult = automata.Stimulate(input); - matchSuccessful = (input == stimulateResult.Matched); + matchSuccessful = input == stimulateResult.Matched; - sb.AppendFormat("\tlabel=\"Matched: {0}\"\n", stimulateResult.Matched.Replace("\"", "\\\"")); + sb.AppendFormat("\tlabel=\"Matched: {0}\"\n", stimulateResult.Matched?.Replace("\"", "\\\"")); sb.Append("\tlabelloc=top;\n"); sb.Append("\tlabeljust=center;\n"); currentStates = stimulateResult.ActiveStates; } - foreach (var transition in automata.Transitions) - { - sb.Append(string.Format("\t{0} -> {1} [label=\"{2}\"]\n", - transition.From.StateNumber, - transition.To.StateNumber, - transition.TransitionLabel().Replace("\\", "\\\\").Replace("\"", "\\\""))); - } + foreach (Transition transition in automata.Transitions) + sb.Append($"\t{transition.From?.StateNumber} -> {transition.To?.StateNumber} [label=\"{transition.TransitionLabel().Replace("\\", "\\\\").Replace("\"", "\\\"")}\"]\n"); - foreach (var state in automata.States.Where(f => f.AcceptState || currentStates.Contains(f))) - { + foreach (TState state in automata.States.Where(f => f.AcceptState || (currentStates?.Contains(f) ?? false))) sb.AppendFormat("\t{0} [{1}{2}]\n", state.StateNumber, state.AcceptState ? "shape=\"doublecircle\"" : "", - currentStates.Contains(state) ? - string.Format(" fillcolor=\"{0}\" style=\"filled\"", matchSuccessful ? "green" : "red") - : ""); - } + (currentStates?.Contains(state) ?? false) ? + $" fillcolor=\"{(matchSuccessful ? "green" : "red")}\" style=\"filled\"" : ""); sb.Append("}"); return sb.ToString(); } - /// /// DOT language label name for transitions /// /// /// /// - internal static string TransitionLabel(this Transition transition ) - { - return transition.ValidInput.ToString(); - } + internal static string TransitionLabel(this Transition transition) => transition.ValidInput.ToString(); } } diff --git a/Piglet/Lexer/Construction/FiniteAutomata.cs b/Piglet/Lexer/Construction/FiniteAutomata.cs index b110897..4f071cd 100644 --- a/Piglet/Lexer/Construction/FiniteAutomata.cs +++ b/Piglet/Lexer/Construction/FiniteAutomata.cs @@ -1,164 +1,163 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; - -namespace Piglet.Lexer.Construction -{ - internal abstract class FiniteAutomata where TState : FiniteAutomata.BaseState - { - public abstract class BaseState - { - public abstract bool AcceptState { get; set; } - public int StateNumber { get; set; } - } - - public IList States { get; set; } - public IList> Transitions { get; set; } - public TState StartState { get; set; } - - - protected FiniteAutomata() - { - States = new List(); - Transitions = new List>(); - } - - public abstract IEnumerable Closure(TState[] states, ISet visitedStates = null); - - public void AssignStateNumbers() - { - int i = 0; - foreach (var state in States) - { - if (state != StartState) - state.StateNumber = ++i; - } - // Always use 0 as the start state - StartState.StateNumber = 0; - } - - public void DistinguishValidInputs() - { - var ranges = new List(Transitions.SelectMany(f => f.ValidInput.Ranges)); - var beginningsAndEnds = ranges.Select(f => f.From).Concat(ranges.Select(f => f.To == char.MaxValue ? f.To : (char)(f.To+1))).ToArray(); - Array.Sort(beginningsAndEnds); - int pivot = 0; - for (int i = 1; i < beginningsAndEnds.Length; ++i) - { - if (beginningsAndEnds[i] != beginningsAndEnds[pivot]) - { - beginningsAndEnds[++pivot] = beginningsAndEnds[i]; - } - } - ++pivot; - - var distinguishedRanges = new List(pivot * 2); - - for(int i = 1; i < pivot; ++i) - { - distinguishedRanges.Add(new CharRange {From = beginningsAndEnds[i-1], To = beginningsAndEnds[i] }); - } - - foreach (var transition in Transitions) - { - transition.ValidInput = new CharSet(transition.ValidInput.Ranges.SelectMany(range => FindNewRanges(range, distinguishedRanges))); - } - } - - private IEnumerable FindNewRanges(CharRange range, List distinguishedRanges) - { - int a = 0; - int b = distinguishedRanges.Count; - - int startIndex = 0; - while (true) - { - int pivot = a + (b - a) / 2; - - int cmp = range.From - distinguishedRanges[pivot].From; - - if (cmp == 0) - { - startIndex = pivot; - break; - } - - if (cmp < 0) - { - b = pivot; - } - else - { - a = pivot; - } - } - - int a2 = startIndex; - int b2 = distinguishedRanges.Count; - char c = range.To == char.MaxValue ? range.To : (char) (range.To + 1); - while (true) - { - int pivot = a2 + (b2 - a2) / 2; - - int cmp = c - distinguishedRanges[pivot].To; - - if (cmp == 0) - { - for (int i = startIndex; i <= pivot; ++i) - { - CharRange f = distinguishedRanges[i]; - yield return new CharRange { From = f.From, To = f.To == char.MaxValue ? f.To : (char) (f.To - 1) }; - } - yield break; - } - - if (cmp < 0) - { - b2 = pivot; - } - else - { - a2 = pivot; - } - } - } - - public StimulateResult Stimulate(string input) - { - var activeStates = Closure(new[] {StartState}).ToList(); - var matchedString = new StringBuilder(); - foreach (var c in input) - { - var toStates = new HashSet(); - foreach (var activeState in activeStates) - { - var nextStates = Transitions.Where(t => t.From == activeState && t.ValidInput.ContainsChar(c)).Select(t=>t.To); - toStates.UnionWith(nextStates); - } - - if (toStates.Any()) - { - matchedString.Append(c); - activeStates = Closure(toStates.ToArray()).ToList(); - } - else - { - break; - } - } - - return new StimulateResult - { - Matched = matchedString.ToString(), - ActiveStates = activeStates - }; - } - } - - internal class StimulateResult where TState : FiniteAutomata.BaseState - { - public string Matched { get; set; } - public IEnumerable ActiveStates { get; set; } - } -} +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; + +namespace Piglet.Lexer.Construction +{ + internal abstract class FiniteAutomata + where TState : FiniteAutomata.BaseState + { + public IList States { get; set; } + public IList> Transitions { get; set; } + public TState? StartState { get; set; } + + + protected FiniteAutomata() + { + States = new List(); + Transitions = new List>(); + } + + public abstract IEnumerable Closure(TState[] states, ISet? visitedStates = null); + + public void AssignStateNumbers() + { + int i = 0; + + foreach (TState state in States) + if (state != StartState) + state.StateNumber = ++i; + + // Always use 0 as the start state + StartState.StateNumber = 0; + } + + public void DistinguishValidInputs() + { + List ranges = new List(Transitions.SelectMany(f => f.ValidInput.Ranges)); + char[] beginningsAndEnds = ranges.Select(f => f.From).Concat(ranges.Select(f => f.To == char.MaxValue ? f.To : (char)(f.To + 1))).ToArray(); + int pivot = 0; + + Array.Sort(beginningsAndEnds); + + for (int i = 1; i < beginningsAndEnds.Length; ++i) + if (beginningsAndEnds[i] != beginningsAndEnds[pivot]) + beginningsAndEnds[++pivot] = beginningsAndEnds[i]; + + ++pivot; + + List distinguishedRanges = new List(pivot * 2); + + for(int i = 1; i < pivot; ++i) + distinguishedRanges.Add(new CharRange + { + From = beginningsAndEnds[i - 1], + To = beginningsAndEnds[i] + }); + + foreach (Transition transition in Transitions) + transition.ValidInput = new CharSet(transition.ValidInput.Ranges.SelectMany(range => FindNewRanges(range, distinguishedRanges))); + } + + private IEnumerable FindNewRanges(CharRange range, List distinguishedRanges) + { + int a = 0; + int b = distinguishedRanges.Count; + int startIndex; + + while (true) + { + int pivot = a + (b - a) / 2; + int cmp = range.From - distinguishedRanges[pivot].From; + + if (cmp == 0) + { + startIndex = pivot; + + break; + } + + if (cmp < 0) + b = pivot; + else + a = pivot; + } + + int a2 = startIndex; + int b2 = distinguishedRanges.Count; + char c = range.To == char.MaxValue ? range.To : (char)(range.To + 1); + + while (true) + { + int pivot = a2 + (b2 - a2) / 2; + int cmp = c - distinguishedRanges[pivot].To; + + if (cmp == 0) + { + for (int i = startIndex; i <= pivot; ++i) + { + CharRange f = distinguishedRanges[i]; + + yield return new CharRange + { + From = f.From, + To = f.To == char.MaxValue ? f.To : (char)(f.To - 1) + }; + } + + yield break; + } + + if (cmp < 0) + b2 = pivot; + else + a2 = pivot; + } + } + + public StimulateResult Stimulate(string input) + { + List activeStates = Closure(new[] { StartState! }).ToList(); + StringBuilder matchedString = new StringBuilder(); + + foreach (char c in input) + { + HashSet toStates = new HashSet(); + + foreach (TState activeState in activeStates) + toStates.UnionWith(from t in Transitions + where t.From == activeState + where t.ValidInput.ContainsChar(c) + select t.To); + + if (toStates.Any()) + { + matchedString.Append(c); + activeStates = Closure(toStates.ToArray()).ToList(); + } + else + break; + } + + return new StimulateResult + { + Matched = matchedString.ToString(), + ActiveStates = activeStates + }; + } + + public abstract class BaseState + { + public abstract bool AcceptState { get; set; } + public int StateNumber { get; set; } + } + } + + internal sealed class StimulateResult + where TState : FiniteAutomata.BaseState + { + public string? Matched { get; set; } + public IEnumerable? ActiveStates { get; set; } + } +} diff --git a/Piglet/Lexer/Construction/LexerConstructionException.cs b/Piglet/Lexer/Construction/LexerConstructionException.cs index 035294a..e6251ed 100644 --- a/Piglet/Lexer/Construction/LexerConstructionException.cs +++ b/Piglet/Lexer/Construction/LexerConstructionException.cs @@ -5,7 +5,8 @@ namespace Piglet.Lexer.Construction /// /// Class of exceptions that may occur when creating a Lexer. /// - public class LexerConstructionException : Exception + public sealed class LexerConstructionException + : Exception { /// /// Construct a new LexerConstructionException @@ -15,5 +16,15 @@ public LexerConstructionException(string message) : base(message) { } + + /// + /// Construct a new LexerConstructionException + /// + /// Message to show + /// Inner exception + public LexerConstructionException(string message, Exception innerException) + : base(message, innerException) + { + } } } \ No newline at end of file diff --git a/Piglet/Lexer/Construction/NFA.cs b/Piglet/Lexer/Construction/NFA.cs index fce5354..4a0c030 100644 --- a/Piglet/Lexer/Construction/NFA.cs +++ b/Piglet/Lexer/Construction/NFA.cs @@ -1,117 +1,118 @@ -using System.Collections.Generic; +using System; +using System.Collections.Generic; using System.Linq; namespace Piglet.Lexer.Construction { - internal class NFA : FiniteAutomata + internal sealed class NFA + : FiniteAutomata { - internal class State : BaseState + internal sealed class State + : BaseState { public override bool AcceptState { get; set; } - public override string ToString() - { - return string.Format("{0} {1}", StateNumber, AcceptState ? "ACCEPT" : ""); - } + public override string ToString() => string.Format("{0} {1}", StateNumber, AcceptState ? "ACCEPT" : ""); } + protected internal void AddAll(NFA nfa) { - foreach (var state in nfa.States) - { + foreach (State state in nfa.States) States.Add(state); - } - foreach (var edge in nfa.Transitions) - { + + foreach (Transition edge in nfa.Transitions) Transitions.Add(edge); - } } protected internal NFA Copy() { - var newNFA = new NFA(); - var stateMap = new Dictionary(); + NFA newNFA = new NFA(); + Dictionary stateMap = new Dictionary(); - foreach (var state in States) + foreach (State state in States) { - var newState = new State { AcceptState = state.AcceptState, StateNumber = state.StateNumber }; + State newState = new State { AcceptState = state.AcceptState, StateNumber = state.StateNumber }; + stateMap.Add(state, newState); newNFA.States.Add(newState); } - foreach (var transition in Transitions) - { - // Hard copy the valid input - var newTransition = new Transition(stateMap[transition.From], stateMap[transition.To], - transition.ValidInput); - newNFA.Transitions.Add(newTransition); - } + foreach (Transition transition in Transitions) + if (transition.To is { } to && transition.From is { } from) + { + // Hard copy the valid input + Transition newTransition = new Transition(stateMap[from], stateMap[to], transition.ValidInput); + + newNFA.Transitions.Add(newTransition); + } + else + throw new InvalidOperationException("At least one transition contains a null value in its source or target state."); + + if (StartState is null) + throw new InvalidOperationException("The start state must not be null."); newNFA.StartState = stateMap[StartState]; return newNFA; } - public override IEnumerable Closure(State[] states, ISet visitedStates = null) + public override IEnumerable Closure(State[] states, ISet? visitedStates = null) { - if (visitedStates == null) - { - visitedStates = new HashSet(); - } + visitedStates ??= new HashSet(); - foreach (var state in states) - { + foreach (State state in states) visitedStates.Add(state); - } // Find all states reachable by following only epsilon edges. - State[] closureStates = - (from e in Transitions.Where(f => states.Contains(f.From) && !f.ValidInput.Any() && !visitedStates.Contains(f.To)) select e.To).ToArray(); + State[] closureStates = (from e in Transitions + let dest = e.To + where dest is { } + where states.Contains(e.From) + where !e.ValidInput.Any() + where !visitedStates.Contains(dest) + select dest).ToArray(); if (closureStates.Length > 0) - { - foreach (var state1 in Closure(closureStates, visitedStates)) - { - yield return state1; - } - } + foreach (State state in Closure(closureStates, visitedStates)) + yield return state; - foreach (var state in states) - { + foreach (State state in states) yield return state; - } } - public static NFA Merge(IList nfas) + public static NFA Merge(IEnumerable nfas) { // Create a new NFA, add everything to it. - var merged = new NFA(); - foreach (var nfa in nfas) - { + NFA merged = new NFA(); + + foreach (NFA nfa in nfas) merged.AddAll(nfa); - } // Add a new start state - var state = new State(); + State state = new State(); + merged.States.Add(state); merged.StartState = state; // Add epsilon transiontions from the start state to all the previous start states - foreach (var nfa in nfas) - { - merged.Transitions.Add(new Transition(state, nfa.StartState)); - } + foreach (NFA nfa in nfas) + if (nfa.StartState is null) + throw new ArgumentException("At least one of the given NFAs contains a start state which is null.", nameof(nfas)); + else + merged.Transitions.Add(new Transition(state, nfa.StartState)); return merged; } public IDictionary> GetAllClosures() { - var output = new Dictionary>(); + Dictionary> output = new Dictionary>(); - foreach (var state in States) + foreach (State state in States) { ISet set = new HashSet(); + set.UnionWith(Closure(new[] {state})); output.Add(state, set); } diff --git a/Piglet/Lexer/Construction/NfaBuilder.cs b/Piglet/Lexer/Construction/NfaBuilder.cs index 5e31d5e..c792ab7 100644 --- a/Piglet/Lexer/Construction/NfaBuilder.cs +++ b/Piglet/Lexer/Construction/NfaBuilder.cs @@ -1,68 +1,79 @@ +using System; using System.Collections.Generic; using System.Linq; namespace Piglet.Lexer.Construction { - internal class NfaBuilder + internal static class NfaBuilder { public static NFA Create(ShuntingYard yard) { - var stack = new Stack(); + Stack stack = new Stack(); - foreach (var token in yard.ShuntedTokens()) + foreach (RegexToken token in yard.ShuntedTokens()) { switch (token.Type) { - case RegExToken.TokenType.OperatorMul: + case RegexTokenType.OperatorMul: stack.Push(RepeatZeroOrMore(stack.Pop())); + break; - case RegExToken.TokenType.OperatorQuestion: + case RegexTokenType.OperatorQuestion: stack.Push(RepeatZeroOrOnce(stack.Pop())); + break; - case RegExToken.TokenType.OperatorOr: + case RegexTokenType.OperatorOr: stack.Push(Or(stack.Pop(), stack.Pop())); + break; - case RegExToken.TokenType.OperatorPlus: + case RegexTokenType.OperatorPlus: stack.Push(RepeatOnceOrMore(stack.Pop())); + break; - case RegExToken.TokenType.Accept: + case RegexTokenType.Accept: stack.Push(Accept(token.Characters)); + break; - case RegExToken.TokenType.OperatorConcat: + case RegexTokenType.OperatorConcat: // & is not commutative, and the stack is reversed. - var second = stack.Pop(); - var first = stack.Pop(); + NFA second = stack.Pop(); + NFA first = stack.Pop(); + stack.Push(And(first, second)); + break; - case RegExToken.TokenType.NumberedRepeat: + case RegexTokenType.NumberedRepeat: stack.Push(NumberedRepeat(stack.Pop(), token.MinRepetitions, token.MaxRepetitions)); + break; default: throw new LexerConstructionException("Unknown operator!"); } } - // We should end up with only ONE NFA on the stack or the expression - // is malformed. + // We should end up with only ONE NFA on the stack or the expression is malformed. if (stack.Count() != 1) - { - throw new LexerConstructionException("Malformed regexp expression"); - } + throw new LexerConstructionException("Malformed regex expression."); // Pop it from the stack, and assign each state a number, primarily for debugging purposes, // they dont _really_ need it. The state numbers actually used are the one used in the DFA. - var nfa = stack.Pop(); + NFA nfa = stack.Pop(); + nfa.AssignStateNumbers(); + return nfa; } + private static NFA RepeatOnceOrMore(NFA nfa) { // Add an epsilon transition from the accept state back to the start state NFA.State oldAcceptState = nfa.States.First(f => f.AcceptState); - nfa.Transitions.Add(new Transition(oldAcceptState, nfa.StartState)); + + nfa.Transitions.Add(new Transition(oldAcceptState, nfa.StartState!)); // Add a new accept state, since we cannot have edges exiting the accept state - var newAcceptState = new NFA.State { AcceptState = true }; + NFA.State newAcceptState = new NFA.State { AcceptState = true }; + nfa.Transitions.Add(new Transition(oldAcceptState, newAcceptState)); nfa.States.Add(newAcceptState); @@ -75,15 +86,15 @@ private static NFA RepeatOnceOrMore(NFA nfa) private static NFA Accept(CharSet acceptCharacters) { // Generate a NFA with a simple path with one state transitioning into an accept state. - var nfa = new NFA(); - var state = new NFA.State(); + NFA nfa = new NFA(); + NFA.State state = new NFA.State(); + nfa.States.Add(state); - var acceptState = new NFA.State { AcceptState = true }; - nfa.States.Add(acceptState); + NFA.State acceptState = new NFA.State { AcceptState = true }; + nfa.States.Add(acceptState); nfa.Transitions.Add(new Transition(state, acceptState, acceptCharacters)); - nfa.StartState = state; return nfa; @@ -92,14 +103,11 @@ private static NFA Accept(CharSet acceptCharacters) public static NFA And(NFA first, NFA second) { // Create a new NFA and use the first NFAs start state as the starting point - var nfa = new NFA { StartState = first.StartState }; + NFA nfa = new NFA { StartState = first.StartState }; - // Change all links in to first acceptstate to go to seconds - // start state - foreach (var edge in first.Transitions.Where(f => f.To.AcceptState)) - { + // Change all links in to first acceptstate to go to seconds start state + foreach (Transition edge in first.Transitions.Where(f => f.To?.AcceptState ?? false)) edge.To = second.StartState; - } // Remove acceptstate from first first.States.Remove(first.States.First(f => f.AcceptState)); @@ -114,28 +122,28 @@ public static NFA And(NFA first, NFA second) public static NFA Or(NFA a, NFA b) { - var nfa = new NFA(); + NFA nfa = new NFA(); // Composite NFA contains all the and all edges in both NFAs nfa.AddAll(a); nfa.AddAll(b); - // Add a start state, link to both NFAs old start state with - // epsilon links + // Add a start state, link to both NFAs old start state with epsilon links nfa.StartState = new NFA.State(); nfa.States.Add(nfa.StartState); - nfa.Transitions.Add(new Transition(nfa.StartState, a.StartState)); - nfa.Transitions.Add(new Transition(nfa.StartState, b.StartState)); + nfa.Transitions.Add(new Transition(nfa.StartState, a.StartState ?? throw new ArgumentException("The NFA must not have a null start state.", nameof(a)))); + nfa.Transitions.Add(new Transition(nfa.StartState, b.StartState ?? throw new ArgumentException("The NFA must not have a null start state.", nameof(b)))); + + // Add a new accept state, link all old accept states to the new accept state with an epsilon link and remove the accept flag + NFA.State newAcceptState = new NFA.State { AcceptState = true }; - // Add a new accept state, link all old accept states to the new accept - // state with an epsilon link and remove the accept flag - var newAcceptState = new NFA.State { AcceptState = true }; - foreach (var oldAcceptState in nfa.States.Where(f => f.AcceptState)) + foreach (NFA.State oldAcceptState in nfa.States.Where(f => f.AcceptState)) { oldAcceptState.AcceptState = false; nfa.Transitions.Add(new Transition(oldAcceptState, newAcceptState)); } + nfa.States.Add(newAcceptState); return nfa; @@ -143,7 +151,7 @@ public static NFA Or(NFA a, NFA b) public static NFA RepeatZeroOrMore(NFA input) { - var nfa = new NFA(); + NFA nfa = new NFA(); // Add everything from the input nfa.AddAll(input); @@ -151,60 +159,63 @@ public static NFA RepeatZeroOrMore(NFA input) // Create a new starting state, link it to the old accept state with Epsilon nfa.StartState = new NFA.State(); nfa.States.Add(nfa.StartState); + NFA.State oldAcceptState = input.States.First(f => f.AcceptState); + nfa.Transitions.Add(new Transition(nfa.StartState, oldAcceptState)); // Add epsilon link from old accept state of input to start, to allow for repetition nfa.Transitions.Add(new Transition(oldAcceptState, input.StartState)); // Create new accept state, link old accept state to new accept state with epsilon - var acceptState = new NFA.State { AcceptState = true }; + NFA.State acceptState = new NFA.State { AcceptState = true }; + nfa.States.Add(acceptState); oldAcceptState.AcceptState = false; nfa.Transitions.Add(new Transition(oldAcceptState, acceptState)); + return nfa; } private static NFA RepeatZeroOrOnce(NFA nfa) { - // Easy enough, add an epsilon transition from the start state - // to the end state. Done + // Easy enough, add an epsilon transition from the start state to the end state. Done nfa.Transitions.Add(new Transition(nfa.StartState, nfa.States.First(f => f.AcceptState))); + return nfa; } private static NFA NumberedRepeat(NFA nfa, int minRepetitions, int maxRepetitions) { - // To create a suitable expression, the special case of infinite max repetitions - // must be separately handled. + // To create a suitable expression, the special case of infinite max repetitions must be separately handled. bool infiniteMax = false; + if (maxRepetitions == int.MaxValue) { infiniteMax = true; maxRepetitions = minRepetitions; } else if (maxRepetitions < minRepetitions) - { maxRepetitions = minRepetitions; - } // Copy the NFA max repetitions times, link them together. NFA output = nfa.Copy(); - var epsilonLinkStates = new Stack(); + Stack epsilonLinkStates = new Stack(); + for (int i = 1; i < maxRepetitions; ++i) { NFA newNfa = nfa.Copy(); - if (i >= minRepetitions || (infiniteMax && i == maxRepetitions - 1 )) - { + + if (i >= minRepetitions || (infiniteMax && i == maxRepetitions - 1)) epsilonLinkStates.Push(newNfa.StartState); - } + output = And(output, newNfa); } if (infiniteMax) { // Use Single to force an exception if this has gone astray - var finalState = epsilonLinkStates.Single(); + NFA.State finalState = epsilonLinkStates.Single(); // Make a little epsilon loop from the final accept state to the start state of the final state output.Transitions.Add(new Transition(output.States.Single(f => f.AcceptState), finalState)); @@ -212,14 +223,11 @@ private static NFA NumberedRepeat(NFA nfa, int minRepetitions, int maxRepetition else { // Add epsilon transitions from accept to beginning states of NFAs in the chain - var acceptState = output.States.Single(f => f.AcceptState); + NFA.State acceptState = output.States.Single(f => f.AcceptState); + while (epsilonLinkStates.Any()) - { - output.Transitions.Add(new Transition(epsilonLinkStates.Pop(), - acceptState)); - } + output.Transitions.Add(new Transition(epsilonLinkStates.Pop(), acceptState)); } - return output; } diff --git a/Piglet/Lexer/Construction/RegExLexer.cs b/Piglet/Lexer/Construction/RegExLexer.cs index 8cbb3a4..a7a6bdb 100644 --- a/Piglet/Lexer/Construction/RegExLexer.cs +++ b/Piglet/Lexer/Construction/RegExLexer.cs @@ -1,539 +1,543 @@ -using System.Collections.Generic; -using System.IO; -using System.Linq; - -namespace Piglet.Lexer.Construction -{ - internal class RegExLexer - { - private readonly TextReader input; - private State state; - - private enum State - { - Normal, - NormalEscaped, - BeginCharacterClass, - InsideCharacterClass, - RangeEnd, - NumberedRepetition, - InsideCharacterClassEscaped - } - - private class CharacterClassState - { - public CharacterClassState() - { - CharsSet = new CharSet(); - } - - public CharSet CharsSet { get; private set; } - public bool Negated { get; set; } - public char LastChar { get; set; } - } - - private class NumberedRepetitionState - { - public NumberedRepetitionState() - { - MinRepetitions = -1; - MaxRepetitions = -1; - Chars = new List(); - } - - public int MaxRepetitions { get; set; } - public int MinRepetitions { get; set; } - public List Chars { get; private set; } - public int CurrentPart { get; set; } - } - - public RegExLexer(TextReader input) - { - this.input = input; - state = State.Normal; - } - - private static readonly char[] escd = new[] - { - '\u0030', '\u0039', '\u0660', '\u0669', '\u06f0', '\u06f9', '\u07c0', '\u07c9', '\u0966', '\u096f', '\u09e6', '\u09ef', - '\u0a66', '\u0a6f', '\u0ae6', '\u0aef', '\u0b66', '\u0b6f', '\u0be6', '\u0bef', '\u0c66', '\u0c6f', '\u0ce6', '\u0cef', - '\u0d66', '\u0d6f', '\u0e50', '\u0e59', '\u0ed0', '\u0ed9', '\u0f20', '\u0f29', '\u1040', '\u1049', '\u1090', '\u1099', - '\u17e0', '\u17e9', '\u1810', '\u1819', '\u1946', '\u194f', '\u19d0', '\u19d9', '\u1b50', '\u1b59', '\u1bb0', '\u1bb9', - '\u1c40', '\u1c49', '\u1c50', '\u1c59', '\ua620', '\ua629', '\ua8d0', '\ua8d9', '\ua900', '\ua909', '\uaa50', '\uaa59', - '\uff10', '\uff19' - }; - - private static readonly char[] escD = new [] - { - '\u0001', '\u002f', '\u003a', '\u065f', '\u066a', '\u06ef', '\u06fa', '\u07bf', '\u07ca', '\u0965', '\u0970', '\u09e5', - '\u09f0', '\u0a65', '\u0a70', '\u0ae5', '\u0af0', '\u0b65', '\u0b70', '\u0be5', '\u0bf0', '\u0c65', '\u0c70', '\u0ce5', - '\u0cf0', '\u0d65', '\u0d70', '\u0e4f', '\u0e5a', '\u0ecf', '\u0eda', '\u0f1f', '\u0f2a', '\u103f', '\u104a', '\u108f', - '\u109a', '\u17df', '\u17ea', '\u180f', '\u181a', '\u1945', '\u1950', '\u19cf', '\u19da', '\u1b4f', '\u1b5a', '\u1baf', - '\u1bba', '\u1c3f', '\u1c4a', '\u1c4f', '\u1c5a', '\ua61f', '\ua62a', '\ua8cf', '\ua8da', '\ua8ff', '\ua90a', '\uaa4f', - '\uaa5a', '\uff0f' - }; - - private static readonly char[] escw = new[] - { - '\u0030', '\u0039', '\u0041', '\u005a', '\u005f', '\u005f', '\u0061', '\u007a', '\u00aa', '\u00aa', '\u00b5', '\u00b5', - '\u00ba','\u00ba','\u00c0','\u00d6','\u00d8','\u00f6','\u00f8','\u02c1','\u02c6','\u02d1','\u02e0','\u02e4', - '\u02ec','\u02ec','\u02ee','\u02ee','\u0300','\u0374','\u0376','\u0377','\u037a','\u037d','\u0386','\u0386', - '\u0388','\u038a','\u038c','\u038c','\u038e','\u03a1','\u03a3','\u03f5','\u03f7','\u0481','\u0483','\u0487', - '\u048a','\u0523','\u0531','\u0556','\u0559','\u0559','\u0561','\u0587','\u0591','\u05bd','\u05bf','\u05bf', - '\u05c1','\u05c2','\u05c4','\u05c5','\u05c7','\u05c7','\u05d0','\u05ea','\u05f0','\u05f2','\u0610','\u061a', - '\u0621','\u065e','\u0660','\u0669','\u066e','\u06d3','\u06d5','\u06dc','\u06df','\u06e8','\u06ea','\u06fc', - '\u06ff','\u06ff','\u0710','\u074a','\u074d','\u07b1','\u07c0','\u07f5','\u07fa','\u07fa','\u0901','\u0902', - '\u0904','\u0939','\u093c','\u093d','\u0941','\u0948','\u094d','\u094d','\u0950','\u0954','\u0958','\u0963', - '\u0966','\u096f','\u0971','\u0972','\u097b','\u097f','\u0981','\u0981','\u0985','\u098c','\u098f','\u0990', - '\u0993','\u09a8','\u09aa','\u09b0','\u09b2','\u09b2','\u09b6','\u09b9','\u09bc','\u09bd','\u09c1','\u09c4', - '\u09cd','\u09ce','\u09dc','\u09dd','\u09df','\u09e3','\u09e6','\u09f1','\u0a01','\u0a02','\u0a05','\u0a0a', - '\u0a0f','\u0a10','\u0a13','\u0a28','\u0a2a','\u0a30','\u0a32','\u0a33','\u0a35','\u0a36','\u0a38','\u0a39', - '\u0a3c','\u0a3c','\u0a41','\u0a42','\u0a47','\u0a48','\u0a4b','\u0a4d','\u0a51','\u0a51','\u0a59','\u0a5c', - '\u0a5e','\u0a5e','\u0a66','\u0a75','\u0a81','\u0a82','\u0a85','\u0a8d','\u0a8f','\u0a91','\u0a93','\u0aa8', - '\u0aaa','\u0ab0','\u0ab2','\u0ab3','\u0ab5','\u0ab9','\u0abc','\u0abd','\u0ac1','\u0ac5','\u0ac7','\u0ac8', - '\u0acd','\u0acd','\u0ad0','\u0ad0','\u0ae0','\u0ae3','\u0ae6','\u0aef','\u0b01','\u0b01','\u0b05','\u0b0c', - '\u0b0f','\u0b10','\u0b13','\u0b28','\u0b2a','\u0b30','\u0b32','\u0b33','\u0b35','\u0b39','\u0b3c','\u0b3d', - '\u0b3f','\u0b3f','\u0b41','\u0b44','\u0b4d','\u0b4d','\u0b56','\u0b56','\u0b5c','\u0b5d','\u0b5f','\u0b63', - '\u0b66','\u0b6f','\u0b71','\u0b71','\u0b82','\u0b83','\u0b85','\u0b8a','\u0b8e','\u0b90','\u0b92','\u0b95', - '\u0b99','\u0b9a','\u0b9c','\u0b9c','\u0b9e','\u0b9f','\u0ba3','\u0ba4','\u0ba8','\u0baa','\u0bae','\u0bb9', - '\u0bc0','\u0bc0','\u0bcd','\u0bcd','\u0bd0','\u0bd0','\u0be6','\u0bef','\u0c05','\u0c0c','\u0c0e','\u0c10', - '\u0c12','\u0c28','\u0c2a','\u0c33','\u0c35','\u0c39','\u0c3d','\u0c40','\u0c46','\u0c48','\u0c4a','\u0c4d', - '\u0c55','\u0c56','\u0c58','\u0c59','\u0c60','\u0c63','\u0c66','\u0c6f','\u0c85','\u0c8c','\u0c8e','\u0c90', - '\u0c92','\u0ca8','\u0caa','\u0cb3','\u0cb5','\u0cb9','\u0cbc','\u0cbd','\u0cbf','\u0cbf','\u0cc6','\u0cc6', - '\u0ccc','\u0ccd','\u0cde','\u0cde','\u0ce0','\u0ce3','\u0ce6','\u0cef','\u0d05','\u0d0c','\u0d0e','\u0d10', - '\u0d12','\u0d28','\u0d2a','\u0d39','\u0d3d','\u0d3d','\u0d41','\u0d44','\u0d4d','\u0d4d','\u0d60','\u0d63', - '\u0d66','\u0d6f','\u0d7a','\u0d7f','\u0d85','\u0d96','\u0d9a','\u0db1','\u0db3','\u0dbb','\u0dbd','\u0dbd', - '\u0dc0','\u0dc6','\u0dca','\u0dca','\u0dd2','\u0dd4','\u0dd6','\u0dd6','\u0e01','\u0e3a','\u0e40','\u0e4e', - '\u0e50','\u0e59','\u0e81','\u0e82','\u0e84','\u0e84','\u0e87','\u0e88','\u0e8a','\u0e8a','\u0e8d','\u0e8d', - '\u0e94','\u0e97','\u0e99','\u0e9f','\u0ea1','\u0ea3','\u0ea5','\u0ea5','\u0ea7','\u0ea7','\u0eaa','\u0eab', - '\u0ead','\u0eb9','\u0ebb','\u0ebd','\u0ec0','\u0ec4','\u0ec6','\u0ec6','\u0ec8','\u0ecd','\u0ed0','\u0ed9', - '\u0edc','\u0edd','\u0f00','\u0f00','\u0f18','\u0f19','\u0f20','\u0f29','\u0f35','\u0f35','\u0f37','\u0f37', - '\u0f39','\u0f39','\u0f40','\u0f47','\u0f49','\u0f6c','\u0f71','\u0f7e','\u0f80','\u0f84','\u0f86','\u0f8b', - '\u0f90','\u0f97','\u0f99','\u0fbc','\u0fc6','\u0fc6','\u1000','\u102a','\u102d','\u1030','\u1032','\u1037', - '\u1039','\u103a','\u103d','\u1049','\u1050','\u1055','\u1058','\u1061','\u1065','\u1066','\u106e','\u1082', - '\u1085','\u1086','\u108d','\u108e','\u1090','\u1099','\u10a0','\u10c5','\u10d0','\u10fa','\u10fc','\u10fc', - '\u1100','\u1159','\u115f','\u11a2','\u11a8','\u11f9','\u1200','\u1248','\u124a','\u124d','\u1250','\u1256', - '\u1258','\u1258','\u125a','\u125d','\u1260','\u1288','\u128a','\u128d','\u1290','\u12b0','\u12b2','\u12b5', - '\u12b8','\u12be','\u12c0','\u12c0','\u12c2','\u12c5','\u12c8','\u12d6','\u12d8','\u1310','\u1312','\u1315', - '\u1318','\u135a','\u135f','\u135f','\u1380','\u138f','\u13a0','\u13f4','\u1401','\u166c','\u166f','\u1676', - '\u1681','\u169a','\u16a0','\u16ea','\u1700','\u170c','\u170e','\u1714','\u1720','\u1734','\u1740','\u1753', - '\u1760','\u176c','\u176e','\u1770','\u1772','\u1773','\u1780','\u17b3','\u17b7','\u17bd','\u17c6','\u17c6', - '\u17c9','\u17d3','\u17d7','\u17d7','\u17dc','\u17dd','\u17e0','\u17e9','\u180b','\u180d','\u1810','\u1819', - '\u1820','\u1877','\u1880','\u18aa','\u1900','\u191c','\u1920','\u1922','\u1927','\u1928','\u1932','\u1932', - '\u1939','\u193b','\u1946','\u196d','\u1970','\u1974','\u1980','\u19a9','\u19c1','\u19c7','\u19d0','\u19d9', - '\u1a00','\u1a18','\u1b00','\u1b03','\u1b05','\u1b34','\u1b36','\u1b3a','\u1b3c','\u1b3c','\u1b42','\u1b42', - '\u1b45','\u1b4b','\u1b50','\u1b59','\u1b6b','\u1b73','\u1b80','\u1b81','\u1b83','\u1ba0','\u1ba2','\u1ba5', - '\u1ba8','\u1ba9','\u1bae','\u1bb9','\u1c00','\u1c23','\u1c2c','\u1c33','\u1c36','\u1c37','\u1c40','\u1c49', - '\u1c4d','\u1c7d','\u1d00','\u1de6','\u1dfe','\u1f15','\u1f18','\u1f1d','\u1f20','\u1f45','\u1f48','\u1f4d', - '\u1f50','\u1f57','\u1f59','\u1f59','\u1f5b','\u1f5b','\u1f5d','\u1f5d','\u1f5f','\u1f7d','\u1f80','\u1fb4', - '\u1fb6','\u1fbc','\u1fbe','\u1fbe','\u1fc2','\u1fc4','\u1fc6','\u1fcc','\u1fd0','\u1fd3','\u1fd6','\u1fdb', - '\u1fe0','\u1fec','\u1ff2','\u1ff4','\u1ff6','\u1ffc','\u203f','\u2040','\u2054','\u2054','\u2071','\u2071', - '\u207f','\u207f','\u2090','\u2094','\u20d0','\u20dc','\u20e1','\u20e1','\u20e5','\u20f0','\u2102','\u2102', - '\u2107','\u2107','\u210a','\u2113','\u2115','\u2115','\u2119','\u211d','\u2124','\u2124','\u2126','\u2126', - '\u2128','\u2128','\u212a','\u212d','\u212f','\u2139','\u213c','\u213f','\u2145','\u2149','\u214e','\u214e', - '\u2183','\u2184','\u2c00','\u2c2e','\u2c30','\u2c5e','\u2c60','\u2c6f','\u2c71','\u2c7d','\u2c80','\u2ce4', - '\u2d00','\u2d25','\u2d30','\u2d65','\u2d6f','\u2d6f','\u2d80','\u2d96','\u2da0','\u2da6','\u2da8','\u2dae', - '\u2db0','\u2db6','\u2db8','\u2dbe','\u2dc0','\u2dc6','\u2dc8','\u2dce','\u2dd0','\u2dd6','\u2dd8','\u2dde', - '\u2de0','\u2dff','\u2e2f','\u2e2f','\u3005','\u3006','\u302a','\u302f','\u3031','\u3035','\u303b','\u303c', - '\u3041','\u3096','\u3099','\u309a','\u309d','\u309f','\u30a1','\u30fa','\u30fc','\u30ff','\u3105','\u312d', - '\u3131','\u318e','\u31a0','\u31b7','\u31f0','\u31ff','\u3400','\u4db5','\u4e00','\u9fc3','\ua000','\ua48c', - '\ua500','\ua60c','\ua610','\ua62b','\ua640','\ua65f','\ua662','\ua66f','\ua67c','\ua67d','\ua67f','\ua697', - '\ua717','\ua71f','\ua722','\ua788','\ua78b','\ua78c','\ua7fb','\ua822','\ua825','\ua826','\ua840','\ua873', - '\ua882','\ua8b3','\ua8c4','\ua8c4','\ua8d0','\ua8d9','\ua900','\ua92d','\ua930','\ua951','\uaa00','\uaa2e', - '\uaa31','\uaa32','\uaa35','\uaa36','\uaa40','\uaa4c','\uaa50','\uaa59','\uac00','\ud7a3','\uf900','\ufa2d', - '\ufa30','\ufa6a','\ufa70','\ufad9','\ufb00','\ufb06','\ufb13','\ufb17','\ufb1d','\ufb28','\ufb2a','\ufb36', - '\ufb38','\ufb3c','\ufb3e','\ufb3e','\ufb40','\ufb41','\ufb43','\ufb44','\ufb46','\ufbb1','\ufbd3','\ufd3d', - '\ufd50','\ufd8f','\ufd92','\ufdc7','\ufdf0','\ufdfb','\ufe00','\ufe0f','\ufe20','\ufe26','\ufe33','\ufe34', - '\ufe4d','\ufe4f','\ufe70','\ufe74','\ufe76','\ufefc','\uff10','\uff19','\uff21','\uff3a','\uff3f','\uff3f', - '\uff41','\uff5a','\uff66','\uffbe','\uffc2','\uffc7','\uffca','\uffcf','\uffd2','\uffd7','\uffda','\uffdc' - }; - - private static readonly char[] escW = new [] - { - '\u0001', '\u002f', '\u003a', '\u0040', '\u005b', '\u005e', '\u0060', '\u0060', '\u007b', '\u00a9', '\u00ab', '\u00b4', - '\u00b6','\u00b9','\u00bb','\u00bf','\u00d7','\u00d7','\u00f7','\u00f7','\u02c2','\u02c5','\u02d2','\u02df', - '\u02e5','\u02eb','\u02ed','\u02ed','\u02ef','\u02ff','\u0375','\u0375','\u0378','\u0379','\u037e','\u0385', - '\u0387','\u0387','\u038b','\u038b','\u038d','\u038d','\u03a2','\u03a2','\u03f6','\u03f6','\u0482','\u0482', - '\u0488','\u0489','\u0524','\u0530','\u0557','\u0558','\u055a','\u0560','\u0588','\u0590','\u05be','\u05be', - '\u05c0','\u05c0','\u05c3','\u05c3','\u05c6','\u05c6','\u05c8','\u05cf','\u05eb','\u05ef','\u05f3','\u060f', - '\u061b','\u0620','\u065f','\u065f','\u066a','\u066d','\u06d4','\u06d4','\u06dd','\u06de','\u06e9','\u06e9', - '\u06fd','\u06fe','\u0700','\u070f','\u074b','\u074c','\u07b2','\u07bf','\u07f6','\u07f9','\u07fb','\u0900', - '\u0903','\u0903','\u093a','\u093b','\u093e','\u0940','\u0949','\u094c','\u094e','\u094f','\u0955','\u0957', - '\u0964','\u0965','\u0970','\u0970','\u0973','\u097a','\u0980','\u0980','\u0982','\u0984','\u098d','\u098e', - '\u0991','\u0992','\u09a9','\u09a9','\u09b1','\u09b1','\u09b3','\u09b5','\u09ba','\u09bb','\u09be','\u09c0', - '\u09c5','\u09cc','\u09cf','\u09db','\u09de','\u09de','\u09e4','\u09e5','\u09f2','\u0a00','\u0a03','\u0a04', - '\u0a0b','\u0a0e','\u0a11','\u0a12','\u0a29','\u0a29','\u0a31','\u0a31','\u0a34','\u0a34','\u0a37','\u0a37', - '\u0a3a','\u0a3b','\u0a3d','\u0a40','\u0a43','\u0a46','\u0a49','\u0a4a','\u0a4e','\u0a50','\u0a52','\u0a58', - '\u0a5d','\u0a5d','\u0a5f','\u0a65','\u0a76','\u0a80','\u0a83','\u0a84','\u0a8e','\u0a8e','\u0a92','\u0a92', - '\u0aa9','\u0aa9','\u0ab1','\u0ab1','\u0ab4','\u0ab4','\u0aba','\u0abb','\u0abe','\u0ac0','\u0ac6','\u0ac6', - '\u0ac9','\u0acc','\u0ace','\u0acf','\u0ad1','\u0adf','\u0ae4','\u0ae5','\u0af0','\u0b00','\u0b02','\u0b04', - '\u0b0d','\u0b0e','\u0b11','\u0b12','\u0b29','\u0b29','\u0b31','\u0b31','\u0b34','\u0b34','\u0b3a','\u0b3b', - '\u0b3e','\u0b3e','\u0b40','\u0b40','\u0b45','\u0b4c','\u0b4e','\u0b55','\u0b57','\u0b5b','\u0b5e','\u0b5e', - '\u0b64','\u0b65','\u0b70','\u0b70','\u0b72','\u0b81','\u0b84','\u0b84','\u0b8b','\u0b8d','\u0b91','\u0b91', - '\u0b96','\u0b98','\u0b9b','\u0b9b','\u0b9d','\u0b9d','\u0ba0','\u0ba2','\u0ba5','\u0ba7','\u0bab','\u0bad', - '\u0bba','\u0bbf','\u0bc1','\u0bcc','\u0bce','\u0bcf','\u0bd1','\u0be5','\u0bf0','\u0c04','\u0c0d','\u0c0d', - '\u0c11','\u0c11','\u0c29','\u0c29','\u0c34','\u0c34','\u0c3a','\u0c3c','\u0c41','\u0c45','\u0c49','\u0c49', - '\u0c4e','\u0c54','\u0c57','\u0c57','\u0c5a','\u0c5f','\u0c64','\u0c65','\u0c70','\u0c84','\u0c8d','\u0c8d', - '\u0c91','\u0c91','\u0ca9','\u0ca9','\u0cb4','\u0cb4','\u0cba','\u0cbb','\u0cbe','\u0cbe','\u0cc0','\u0cc5', - '\u0cc7','\u0ccb','\u0cce','\u0cdd','\u0cdf','\u0cdf','\u0ce4','\u0ce5','\u0cf0','\u0d04','\u0d0d','\u0d0d', - '\u0d11','\u0d11','\u0d29','\u0d29','\u0d3a','\u0d3c','\u0d3e','\u0d40','\u0d45','\u0d4c','\u0d4e','\u0d5f', - '\u0d64','\u0d65','\u0d70','\u0d79','\u0d80','\u0d84','\u0d97','\u0d99','\u0db2','\u0db2','\u0dbc','\u0dbc', - '\u0dbe','\u0dbf','\u0dc7','\u0dc9','\u0dcb','\u0dd1','\u0dd5','\u0dd5','\u0dd7','\u0e00','\u0e3b','\u0e3f', - '\u0e4f','\u0e4f','\u0e5a','\u0e80','\u0e83','\u0e83','\u0e85','\u0e86','\u0e89','\u0e89','\u0e8b','\u0e8c', - '\u0e8e','\u0e93','\u0e98','\u0e98','\u0ea0','\u0ea0','\u0ea4','\u0ea4','\u0ea6','\u0ea6','\u0ea8','\u0ea9', - '\u0eac','\u0eac','\u0eba','\u0eba','\u0ebe','\u0ebf','\u0ec5','\u0ec5','\u0ec7','\u0ec7','\u0ece','\u0ecf', - '\u0eda','\u0edb','\u0ede','\u0eff','\u0f01','\u0f17','\u0f1a','\u0f1f','\u0f2a','\u0f34','\u0f36','\u0f36', - '\u0f38','\u0f38','\u0f3a','\u0f3f','\u0f48','\u0f48','\u0f6d','\u0f70','\u0f7f','\u0f7f','\u0f85','\u0f85', - '\u0f8c','\u0f8f','\u0f98','\u0f98','\u0fbd','\u0fc5','\u0fc7','\u0fff','\u102b','\u102c','\u1031','\u1031', - '\u1038','\u1038','\u103b','\u103c','\u104a','\u104f','\u1056','\u1057','\u1062','\u1064','\u1067','\u106d', - '\u1083','\u1084','\u1087','\u108c','\u108f','\u108f','\u109a','\u109f','\u10c6','\u10cf','\u10fb','\u10fb', - '\u10fd','\u10ff','\u115a','\u115e','\u11a3','\u11a7','\u11fa','\u11ff','\u1249','\u1249','\u124e','\u124f', - '\u1257','\u1257','\u1259','\u1259','\u125e','\u125f','\u1289','\u1289','\u128e','\u128f','\u12b1','\u12b1', - '\u12b6','\u12b7','\u12bf','\u12bf','\u12c1','\u12c1','\u12c6','\u12c7','\u12d7','\u12d7','\u1311','\u1311', - '\u1316','\u1317','\u135b','\u135e','\u1360','\u137f','\u1390','\u139f','\u13f5','\u1400','\u166d','\u166e', - '\u1677','\u1680','\u169b','\u169f','\u16eb','\u16ff','\u170d','\u170d','\u1715','\u171f','\u1735','\u173f', - '\u1754','\u175f','\u176d','\u176d','\u1771','\u1771','\u1774','\u177f','\u17b4','\u17b6','\u17be','\u17c5', - '\u17c7','\u17c8','\u17d4','\u17d6','\u17d8','\u17db','\u17de','\u17df','\u17ea','\u180a','\u180e','\u180f', - '\u181a','\u181f','\u1878','\u187f','\u18ab','\u18ff','\u191d','\u191f','\u1923','\u1926','\u1929','\u1931', - '\u1933','\u1938','\u193c','\u1945','\u196e','\u196f','\u1975','\u197f','\u19aa','\u19c0','\u19c8','\u19cf', - '\u19da','\u19ff','\u1a19','\u1aff','\u1b04','\u1b04','\u1b35','\u1b35','\u1b3b','\u1b3b','\u1b3d','\u1b41', - '\u1b43','\u1b44','\u1b4c','\u1b4f','\u1b5a','\u1b6a','\u1b74','\u1b7f','\u1b82','\u1b82','\u1ba1','\u1ba1', - '\u1ba6','\u1ba7','\u1baa','\u1bad','\u1bba','\u1bff','\u1c24','\u1c2b','\u1c34','\u1c35','\u1c38','\u1c3f', - '\u1c4a','\u1c4c','\u1c7e','\u1cff','\u1de7','\u1dfd','\u1f16','\u1f17','\u1f1e','\u1f1f','\u1f46','\u1f47', - '\u1f4e','\u1f4f','\u1f58','\u1f58','\u1f5a','\u1f5a','\u1f5c','\u1f5c','\u1f5e','\u1f5e','\u1f7e','\u1f7f', - '\u1fb5','\u1fb5','\u1fbd','\u1fbd','\u1fbf','\u1fc1','\u1fc5','\u1fc5','\u1fcd','\u1fcf','\u1fd4','\u1fd5', - '\u1fdc','\u1fdf','\u1fed','\u1ff1','\u1ff5','\u1ff5','\u1ffd','\u203e','\u2041','\u2053','\u2055','\u2070', - '\u2072','\u207e','\u2080','\u208f','\u2095','\u20cf','\u20dd','\u20e0','\u20e2','\u20e4','\u20f1','\u2101', - '\u2103','\u2106','\u2108','\u2109','\u2114','\u2114','\u2116','\u2118','\u211e','\u2123','\u2125','\u2125', - '\u2127','\u2127','\u2129','\u2129','\u212e','\u212e','\u213a','\u213b','\u2140','\u2144','\u214a','\u214d', - '\u214f','\u2182','\u2185','\u2bff','\u2c2f','\u2c2f','\u2c5f','\u2c5f','\u2c70','\u2c70','\u2c7e','\u2c7f', - '\u2ce5','\u2cff','\u2d26','\u2d2f','\u2d66','\u2d6e','\u2d70','\u2d7f','\u2d97','\u2d9f','\u2da7','\u2da7', - '\u2daf','\u2daf','\u2db7','\u2db7','\u2dbf','\u2dbf','\u2dc7','\u2dc7','\u2dcf','\u2dcf','\u2dd7','\u2dd7', - '\u2ddf','\u2ddf','\u2e00','\u2e2e','\u2e30','\u3004','\u3007','\u3029','\u3030','\u3030','\u3036','\u303a', - '\u303d','\u3040','\u3097','\u3098','\u309b','\u309c','\u30a0','\u30a0','\u30fb','\u30fb','\u3100','\u3104', - '\u312e','\u3130','\u318f','\u319f','\u31b8','\u31ef','\u3200','\u33ff','\u4db6','\u4dff','\u9fc4','\u9fff', - '\ua48d','\ua4ff','\ua60d','\ua60f','\ua62c','\ua63f','\ua660','\ua661','\ua670','\ua67b','\ua67e','\ua67e', - '\ua698','\ua716','\ua720','\ua721','\ua789','\ua78a','\ua78d','\ua7fa','\ua823','\ua824','\ua827','\ua83f', - '\ua874','\ua881','\ua8b4','\ua8c3','\ua8c5','\ua8cf','\ua8da','\ua8ff','\ua92e','\ua92f','\ua952','\ua9ff', - '\uaa2f','\uaa30','\uaa33','\uaa34','\uaa37','\uaa3f','\uaa4d','\uaa4f','\uaa5a','\uabff','\ud7a4','\uf8ff', - '\ufa2e','\ufa2f','\ufa6b','\ufa6f','\ufada','\ufaff','\ufb07','\ufb12','\ufb18','\ufb1c','\ufb29','\ufb29', - '\ufb37','\ufb37','\ufb3d','\ufb3d','\ufb3f','\ufb3f','\ufb42','\ufb42','\ufb45','\ufb45','\ufbb2','\ufbd2', - '\ufd3e','\ufd4f','\ufd90','\ufd91','\ufdc8','\ufdef','\ufdfc','\ufdff','\ufe10','\ufe1f','\ufe27','\ufe32', - '\ufe35','\ufe4c','\ufe50','\ufe6f','\ufe75','\ufe75','\ufefd','\uff0f','\uff1a','\uff20','\uff3b','\uff3e', - '\uff40','\uff40','\uff5b','\uff65','\uffbf','\uffc1','\uffc8','\uffc9','\uffd0','\uffd1','\uffd8','\uffd9' - }; - - private CharSet EscapedCharToAcceptCharRange(char c) - { - switch (c) - { - // A lot of these are REALLY funky numbers. Tibetan numbers and such. You name it - case 'd': - return new CharSet(false, escd); - // Shorthand for [^0-9] - case 'D': - return new CharSet(false, escD); - case 's': - return AllWhitespaceCharacters; - case 'S': - return AllCharactersExceptNull.Except(AllWhitespaceCharacters); - case 'w': - return new CharSet(false, escw); - case 'W': - return new CharSet(false, escW); - case 'n': - return SingleChar('\n'); - case 'r': - return SingleChar('\r'); - case '.': - case '*': - case '|': - case '[': - case ']': - case '+': - case '(': - case ')': - case '\\': - case '{': - case '}': - case ' ': - case '?': - return SingleChar(c); - default: - return new CharSet(); // Empty charset, might be added to - } - } - - private CharSet SingleChar(char c) - { - var cs = new CharSet(); - cs.Add(c); - return cs; - } - - private CharSet EscapedCharToAcceptCharsInClass(char c) - { - // There are some additional escapeable characters for a character class - switch (c) - { - case '-': - case '^': - return SingleChar(c); - - } - return EscapedCharToAcceptCharRange(c); - } - - public RegExToken NextToken() - { - // These keeps track of classes - var classState = new CharacterClassState(); - var numberedRepetitionState = new NumberedRepetitionState(); - state = State.Normal; - - while (input.Peek() != -1) - { - var c = (char)input.Read(); - - switch (state) - { - case State.Normal: - switch (c) - { - case '\\': - state = State.NormalEscaped; - break; - case '[': - state = State.BeginCharacterClass; - break; - case '{': - state = State.NumberedRepetition; - break; - - case '(': return new RegExToken { Type = RegExToken.TokenType.OperatorOpenParanthesis }; - case ')': return new RegExToken { Type = RegExToken.TokenType.OperatorCloseParanthesis }; - case '|': return new RegExToken { Type = RegExToken.TokenType.OperatorOr }; - case '+': return new RegExToken { Type = RegExToken.TokenType.OperatorPlus }; - case '*': return new RegExToken { Type = RegExToken.TokenType.OperatorMul }; - case '?': return new RegExToken { Type = RegExToken.TokenType.OperatorQuestion }; - case '.': return new RegExToken { Type = RegExToken.TokenType.Accept, Characters = AllCharactersExceptNull }; - default: return new RegExToken { Type = RegExToken.TokenType.Accept, Characters = SingleChar(c)}; - } - break; - - case State.NormalEscaped: - { - var characters = EscapedCharToAcceptCharRange(c); - if (!characters.Any()) - { - throw new LexerConstructionException(string.Format("Unknown escaped character '{0}'", c)); - } - return new RegExToken {Characters = characters, Type = RegExToken.TokenType.Accept}; - } - - case State.BeginCharacterClass: - switch (c) - { - case '^': - if (classState.Negated) - { - // If the classstate is ALREADY negated - // Readd the ^ to the expression - classState.LastChar = '^'; - state = State.InsideCharacterClass; - } - classState.Negated = true; - break; - case '[': - case ']': - case '-': - // This does not break the character class TODO: I THINK!!! - classState.LastChar = c; - break; - case '\\': - state = State.InsideCharacterClassEscaped; - break; - default: - classState.LastChar = c; - state = State.InsideCharacterClass; - break; - } - break; - - case State.InsideCharacterClass: - switch (c) - { - case '-': - state = State.RangeEnd; - break; - case '[': - throw new LexerConstructionException("Opening new character class inside an already open one"); - case ']': - if (classState.LastChar != (char)0) - classState.CharsSet.Add(classState.LastChar); - - // Ending class - return new RegExToken - { - Type = RegExToken.TokenType.Accept, - Characters = classState.Negated - ? AllCharactersExceptNull.Except(classState.CharsSet) - : classState.CharsSet - }; - case '\\': - state = State.InsideCharacterClassEscaped; - break; - default: - if (classState.LastChar != 0) - classState.CharsSet.Add(classState.LastChar); - classState.LastChar = c; - break; - } - break; - - case State.InsideCharacterClassEscaped: - { - var characters = EscapedCharToAcceptCharsInClass(c); - if (!characters.Any()) - { - throw new LexerConstructionException(string.Format("Unknown escaped character '{0}' in character class", c)); - } - - if (classState.LastChar != 0) - classState.CharsSet.Add(classState.LastChar); - - classState.CharsSet.UnionWith(characters); - classState.LastChar = (char)0; - state = State.InsideCharacterClass; - } - break; - - - case State.RangeEnd: - switch (c) - { - case ']': - // We found the - at the position BEFORE the end of the class - // which means we should handle it as a litteral and end the class - classState.CharsSet.Add(classState.LastChar); - classState.CharsSet.Add('-'); - - return new RegExToken - { - Type = RegExToken.TokenType.Accept, - Characters = classState.Negated - ? AllCharactersExceptNull.Except(classState.CharsSet) - : classState.CharsSet - }; - - default: - char lastClassChar = classState.LastChar; - char from = lastClassChar < c ? lastClassChar : c; - char to = lastClassChar < c ? c : lastClassChar; - classState.CharsSet.AddRange(from, to); - classState.LastChar = (char) 0; - state = State.InsideCharacterClass; - break; - } - break; - - case State.NumberedRepetition: - switch (c) - { - case '0': // Is it really OK to start with a 0. It is now. - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - numberedRepetitionState.Chars.Add(c); - break; - case '}': - case ':': - case ',': - // Parse whatever is in Chars - int reps; - - // Number is required in FIRST part but OPTIONAL in the second - if (numberedRepetitionState.Chars.Any() || numberedRepetitionState.CurrentPart == 0) - { - if (!int.TryParse(new string(numberedRepetitionState.Chars.ToArray()), out reps)) - { - throw new LexerConstructionException("Numbered repetition operator contains operand that is not a number"); - } - } - else - { - // End up here when nothing specified in the last part. - // Use the max value to say that it can be infinite numbers. - reps = int.MaxValue; - } - numberedRepetitionState.Chars.Clear(); - - // Set the right value - if (numberedRepetitionState.CurrentPart == 0) - { - numberedRepetitionState.MinRepetitions = reps; - } - else - { - numberedRepetitionState.MaxRepetitions = reps; - } - - if (c == ':' || c == ',') - { - ++numberedRepetitionState.CurrentPart; - if (numberedRepetitionState.CurrentPart > 1) - throw new LexerConstructionException("More than one , in numbered repetition."); - } - else - { - return new RegExToken - { - Type = RegExToken.TokenType.NumberedRepeat, - MinRepetitions = numberedRepetitionState.MinRepetitions, - MaxRepetitions = numberedRepetitionState.MaxRepetitions - }; - } - break; - default: - throw new LexerConstructionException( - string.Format("Illegal character {0} in numbered repetition", c)); - } - break; - } - } - - // We get here if we try to lex when the expression has ended. - return null; - } - - private static CharSet CharRange(char start, char end) - { - var charRange = new CharSet(); - charRange.AddRange(start, end); - return charRange; - } - - protected static CharSet AllCharactersExceptNull - { - get - { - return CharRange((char) 1, char.MaxValue); - } - } - - protected static CharSet AllWhitespaceCharacters - { - get - { - return new CharSet(false, '\u0009', '\u000d', '\u0020', '\u0020', '\u0085', '\u0085', '\u00a0', '\u00a0', '\u1680', '\u1680', '\u180e', '\u180e', - '\u2000', '\u200a', '\u2028', '\u2029', '\u202f', '\u202f', '\u205f', '\u205f', '\u3000', '\u3000'); - } - } - } -} +using System.Collections.Generic; +using System.Linq; +using System.IO; + +namespace Piglet.Lexer.Construction +{ + internal sealed class RegexLexer + { + private static readonly char[] escd = new[] + { + '\u0030', '\u0039', '\u0660', '\u0669', '\u06f0', '\u06f9', '\u07c0', '\u07c9', '\u0966', '\u096f', '\u09e6', '\u09ef', + '\u0a66', '\u0a6f', '\u0ae6', '\u0aef', '\u0b66', '\u0b6f', '\u0be6', '\u0bef', '\u0c66', '\u0c6f', '\u0ce6', '\u0cef', + '\u0d66', '\u0d6f', '\u0e50', '\u0e59', '\u0ed0', '\u0ed9', '\u0f20', '\u0f29', '\u1040', '\u1049', '\u1090', '\u1099', + '\u17e0', '\u17e9', '\u1810', '\u1819', '\u1946', '\u194f', '\u19d0', '\u19d9', '\u1b50', '\u1b59', '\u1bb0', '\u1bb9', + '\u1c40', '\u1c49', '\u1c50', '\u1c59', '\ua620', '\ua629', '\ua8d0', '\ua8d9', '\ua900', '\ua909', '\uaa50', '\uaa59', + '\uff10', '\uff19' + }; + private static readonly char[] escD = new[] + { + '\u0001', '\u002f', '\u003a', '\u065f', '\u066a', '\u06ef', '\u06fa', '\u07bf', '\u07ca', '\u0965', '\u0970', '\u09e5', + '\u09f0', '\u0a65', '\u0a70', '\u0ae5', '\u0af0', '\u0b65', '\u0b70', '\u0be5', '\u0bf0', '\u0c65', '\u0c70', '\u0ce5', + '\u0cf0', '\u0d65', '\u0d70', '\u0e4f', '\u0e5a', '\u0ecf', '\u0eda', '\u0f1f', '\u0f2a', '\u103f', '\u104a', '\u108f', + '\u109a', '\u17df', '\u17ea', '\u180f', '\u181a', '\u1945', '\u1950', '\u19cf', '\u19da', '\u1b4f', '\u1b5a', '\u1baf', + '\u1bba', '\u1c3f', '\u1c4a', '\u1c4f', '\u1c5a', '\ua61f', '\ua62a', '\ua8cf', '\ua8da', '\ua8ff', '\ua90a', '\uaa4f', + '\uaa5a', '\uff0f' + }; + private static readonly char[] escw = new[] + { + '\u0030', '\u0039', '\u0041', '\u005a', '\u005f', '\u005f', '\u0061', '\u007a', '\u00aa', '\u00aa', '\u00b5', '\u00b5', + '\u00ba','\u00ba','\u00c0','\u00d6','\u00d8','\u00f6','\u00f8','\u02c1','\u02c6','\u02d1','\u02e0','\u02e4', + '\u02ec','\u02ec','\u02ee','\u02ee','\u0300','\u0374','\u0376','\u0377','\u037a','\u037d','\u0386','\u0386', + '\u0388','\u038a','\u038c','\u038c','\u038e','\u03a1','\u03a3','\u03f5','\u03f7','\u0481','\u0483','\u0487', + '\u048a','\u0523','\u0531','\u0556','\u0559','\u0559','\u0561','\u0587','\u0591','\u05bd','\u05bf','\u05bf', + '\u05c1','\u05c2','\u05c4','\u05c5','\u05c7','\u05c7','\u05d0','\u05ea','\u05f0','\u05f2','\u0610','\u061a', + '\u0621','\u065e','\u0660','\u0669','\u066e','\u06d3','\u06d5','\u06dc','\u06df','\u06e8','\u06ea','\u06fc', + '\u06ff','\u06ff','\u0710','\u074a','\u074d','\u07b1','\u07c0','\u07f5','\u07fa','\u07fa','\u0901','\u0902', + '\u0904','\u0939','\u093c','\u093d','\u0941','\u0948','\u094d','\u094d','\u0950','\u0954','\u0958','\u0963', + '\u0966','\u096f','\u0971','\u0972','\u097b','\u097f','\u0981','\u0981','\u0985','\u098c','\u098f','\u0990', + '\u0993','\u09a8','\u09aa','\u09b0','\u09b2','\u09b2','\u09b6','\u09b9','\u09bc','\u09bd','\u09c1','\u09c4', + '\u09cd','\u09ce','\u09dc','\u09dd','\u09df','\u09e3','\u09e6','\u09f1','\u0a01','\u0a02','\u0a05','\u0a0a', + '\u0a0f','\u0a10','\u0a13','\u0a28','\u0a2a','\u0a30','\u0a32','\u0a33','\u0a35','\u0a36','\u0a38','\u0a39', + '\u0a3c','\u0a3c','\u0a41','\u0a42','\u0a47','\u0a48','\u0a4b','\u0a4d','\u0a51','\u0a51','\u0a59','\u0a5c', + '\u0a5e','\u0a5e','\u0a66','\u0a75','\u0a81','\u0a82','\u0a85','\u0a8d','\u0a8f','\u0a91','\u0a93','\u0aa8', + '\u0aaa','\u0ab0','\u0ab2','\u0ab3','\u0ab5','\u0ab9','\u0abc','\u0abd','\u0ac1','\u0ac5','\u0ac7','\u0ac8', + '\u0acd','\u0acd','\u0ad0','\u0ad0','\u0ae0','\u0ae3','\u0ae6','\u0aef','\u0b01','\u0b01','\u0b05','\u0b0c', + '\u0b0f','\u0b10','\u0b13','\u0b28','\u0b2a','\u0b30','\u0b32','\u0b33','\u0b35','\u0b39','\u0b3c','\u0b3d', + '\u0b3f','\u0b3f','\u0b41','\u0b44','\u0b4d','\u0b4d','\u0b56','\u0b56','\u0b5c','\u0b5d','\u0b5f','\u0b63', + '\u0b66','\u0b6f','\u0b71','\u0b71','\u0b82','\u0b83','\u0b85','\u0b8a','\u0b8e','\u0b90','\u0b92','\u0b95', + '\u0b99','\u0b9a','\u0b9c','\u0b9c','\u0b9e','\u0b9f','\u0ba3','\u0ba4','\u0ba8','\u0baa','\u0bae','\u0bb9', + '\u0bc0','\u0bc0','\u0bcd','\u0bcd','\u0bd0','\u0bd0','\u0be6','\u0bef','\u0c05','\u0c0c','\u0c0e','\u0c10', + '\u0c12','\u0c28','\u0c2a','\u0c33','\u0c35','\u0c39','\u0c3d','\u0c40','\u0c46','\u0c48','\u0c4a','\u0c4d', + '\u0c55','\u0c56','\u0c58','\u0c59','\u0c60','\u0c63','\u0c66','\u0c6f','\u0c85','\u0c8c','\u0c8e','\u0c90', + '\u0c92','\u0ca8','\u0caa','\u0cb3','\u0cb5','\u0cb9','\u0cbc','\u0cbd','\u0cbf','\u0cbf','\u0cc6','\u0cc6', + '\u0ccc','\u0ccd','\u0cde','\u0cde','\u0ce0','\u0ce3','\u0ce6','\u0cef','\u0d05','\u0d0c','\u0d0e','\u0d10', + '\u0d12','\u0d28','\u0d2a','\u0d39','\u0d3d','\u0d3d','\u0d41','\u0d44','\u0d4d','\u0d4d','\u0d60','\u0d63', + '\u0d66','\u0d6f','\u0d7a','\u0d7f','\u0d85','\u0d96','\u0d9a','\u0db1','\u0db3','\u0dbb','\u0dbd','\u0dbd', + '\u0dc0','\u0dc6','\u0dca','\u0dca','\u0dd2','\u0dd4','\u0dd6','\u0dd6','\u0e01','\u0e3a','\u0e40','\u0e4e', + '\u0e50','\u0e59','\u0e81','\u0e82','\u0e84','\u0e84','\u0e87','\u0e88','\u0e8a','\u0e8a','\u0e8d','\u0e8d', + '\u0e94','\u0e97','\u0e99','\u0e9f','\u0ea1','\u0ea3','\u0ea5','\u0ea5','\u0ea7','\u0ea7','\u0eaa','\u0eab', + '\u0ead','\u0eb9','\u0ebb','\u0ebd','\u0ec0','\u0ec4','\u0ec6','\u0ec6','\u0ec8','\u0ecd','\u0ed0','\u0ed9', + '\u0edc','\u0edd','\u0f00','\u0f00','\u0f18','\u0f19','\u0f20','\u0f29','\u0f35','\u0f35','\u0f37','\u0f37', + '\u0f39','\u0f39','\u0f40','\u0f47','\u0f49','\u0f6c','\u0f71','\u0f7e','\u0f80','\u0f84','\u0f86','\u0f8b', + '\u0f90','\u0f97','\u0f99','\u0fbc','\u0fc6','\u0fc6','\u1000','\u102a','\u102d','\u1030','\u1032','\u1037', + '\u1039','\u103a','\u103d','\u1049','\u1050','\u1055','\u1058','\u1061','\u1065','\u1066','\u106e','\u1082', + '\u1085','\u1086','\u108d','\u108e','\u1090','\u1099','\u10a0','\u10c5','\u10d0','\u10fa','\u10fc','\u10fc', + '\u1100','\u1159','\u115f','\u11a2','\u11a8','\u11f9','\u1200','\u1248','\u124a','\u124d','\u1250','\u1256', + '\u1258','\u1258','\u125a','\u125d','\u1260','\u1288','\u128a','\u128d','\u1290','\u12b0','\u12b2','\u12b5', + '\u12b8','\u12be','\u12c0','\u12c0','\u12c2','\u12c5','\u12c8','\u12d6','\u12d8','\u1310','\u1312','\u1315', + '\u1318','\u135a','\u135f','\u135f','\u1380','\u138f','\u13a0','\u13f4','\u1401','\u166c','\u166f','\u1676', + '\u1681','\u169a','\u16a0','\u16ea','\u1700','\u170c','\u170e','\u1714','\u1720','\u1734','\u1740','\u1753', + '\u1760','\u176c','\u176e','\u1770','\u1772','\u1773','\u1780','\u17b3','\u17b7','\u17bd','\u17c6','\u17c6', + '\u17c9','\u17d3','\u17d7','\u17d7','\u17dc','\u17dd','\u17e0','\u17e9','\u180b','\u180d','\u1810','\u1819', + '\u1820','\u1877','\u1880','\u18aa','\u1900','\u191c','\u1920','\u1922','\u1927','\u1928','\u1932','\u1932', + '\u1939','\u193b','\u1946','\u196d','\u1970','\u1974','\u1980','\u19a9','\u19c1','\u19c7','\u19d0','\u19d9', + '\u1a00','\u1a18','\u1b00','\u1b03','\u1b05','\u1b34','\u1b36','\u1b3a','\u1b3c','\u1b3c','\u1b42','\u1b42', + '\u1b45','\u1b4b','\u1b50','\u1b59','\u1b6b','\u1b73','\u1b80','\u1b81','\u1b83','\u1ba0','\u1ba2','\u1ba5', + '\u1ba8','\u1ba9','\u1bae','\u1bb9','\u1c00','\u1c23','\u1c2c','\u1c33','\u1c36','\u1c37','\u1c40','\u1c49', + '\u1c4d','\u1c7d','\u1d00','\u1de6','\u1dfe','\u1f15','\u1f18','\u1f1d','\u1f20','\u1f45','\u1f48','\u1f4d', + '\u1f50','\u1f57','\u1f59','\u1f59','\u1f5b','\u1f5b','\u1f5d','\u1f5d','\u1f5f','\u1f7d','\u1f80','\u1fb4', + '\u1fb6','\u1fbc','\u1fbe','\u1fbe','\u1fc2','\u1fc4','\u1fc6','\u1fcc','\u1fd0','\u1fd3','\u1fd6','\u1fdb', + '\u1fe0','\u1fec','\u1ff2','\u1ff4','\u1ff6','\u1ffc','\u203f','\u2040','\u2054','\u2054','\u2071','\u2071', + '\u207f','\u207f','\u2090','\u2094','\u20d0','\u20dc','\u20e1','\u20e1','\u20e5','\u20f0','\u2102','\u2102', + '\u2107','\u2107','\u210a','\u2113','\u2115','\u2115','\u2119','\u211d','\u2124','\u2124','\u2126','\u2126', + '\u2128','\u2128','\u212a','\u212d','\u212f','\u2139','\u213c','\u213f','\u2145','\u2149','\u214e','\u214e', + '\u2183','\u2184','\u2c00','\u2c2e','\u2c30','\u2c5e','\u2c60','\u2c6f','\u2c71','\u2c7d','\u2c80','\u2ce4', + '\u2d00','\u2d25','\u2d30','\u2d65','\u2d6f','\u2d6f','\u2d80','\u2d96','\u2da0','\u2da6','\u2da8','\u2dae', + '\u2db0','\u2db6','\u2db8','\u2dbe','\u2dc0','\u2dc6','\u2dc8','\u2dce','\u2dd0','\u2dd6','\u2dd8','\u2dde', + '\u2de0','\u2dff','\u2e2f','\u2e2f','\u3005','\u3006','\u302a','\u302f','\u3031','\u3035','\u303b','\u303c', + '\u3041','\u3096','\u3099','\u309a','\u309d','\u309f','\u30a1','\u30fa','\u30fc','\u30ff','\u3105','\u312d', + '\u3131','\u318e','\u31a0','\u31b7','\u31f0','\u31ff','\u3400','\u4db5','\u4e00','\u9fc3','\ua000','\ua48c', + '\ua500','\ua60c','\ua610','\ua62b','\ua640','\ua65f','\ua662','\ua66f','\ua67c','\ua67d','\ua67f','\ua697', + '\ua717','\ua71f','\ua722','\ua788','\ua78b','\ua78c','\ua7fb','\ua822','\ua825','\ua826','\ua840','\ua873', + '\ua882','\ua8b3','\ua8c4','\ua8c4','\ua8d0','\ua8d9','\ua900','\ua92d','\ua930','\ua951','\uaa00','\uaa2e', + '\uaa31','\uaa32','\uaa35','\uaa36','\uaa40','\uaa4c','\uaa50','\uaa59','\uac00','\ud7a3','\uf900','\ufa2d', + '\ufa30','\ufa6a','\ufa70','\ufad9','\ufb00','\ufb06','\ufb13','\ufb17','\ufb1d','\ufb28','\ufb2a','\ufb36', + '\ufb38','\ufb3c','\ufb3e','\ufb3e','\ufb40','\ufb41','\ufb43','\ufb44','\ufb46','\ufbb1','\ufbd3','\ufd3d', + '\ufd50','\ufd8f','\ufd92','\ufdc7','\ufdf0','\ufdfb','\ufe00','\ufe0f','\ufe20','\ufe26','\ufe33','\ufe34', + '\ufe4d','\ufe4f','\ufe70','\ufe74','\ufe76','\ufefc','\uff10','\uff19','\uff21','\uff3a','\uff3f','\uff3f', + '\uff41','\uff5a','\uff66','\uffbe','\uffc2','\uffc7','\uffca','\uffcf','\uffd2','\uffd7','\uffda','\uffdc' + }; + private static readonly char[] escW = new[] + { + '\u0001', '\u002f', '\u003a', '\u0040', '\u005b', '\u005e', '\u0060', '\u0060', '\u007b', '\u00a9', '\u00ab', '\u00b4', + '\u00b6','\u00b9','\u00bb','\u00bf','\u00d7','\u00d7','\u00f7','\u00f7','\u02c2','\u02c5','\u02d2','\u02df', + '\u02e5','\u02eb','\u02ed','\u02ed','\u02ef','\u02ff','\u0375','\u0375','\u0378','\u0379','\u037e','\u0385', + '\u0387','\u0387','\u038b','\u038b','\u038d','\u038d','\u03a2','\u03a2','\u03f6','\u03f6','\u0482','\u0482', + '\u0488','\u0489','\u0524','\u0530','\u0557','\u0558','\u055a','\u0560','\u0588','\u0590','\u05be','\u05be', + '\u05c0','\u05c0','\u05c3','\u05c3','\u05c6','\u05c6','\u05c8','\u05cf','\u05eb','\u05ef','\u05f3','\u060f', + '\u061b','\u0620','\u065f','\u065f','\u066a','\u066d','\u06d4','\u06d4','\u06dd','\u06de','\u06e9','\u06e9', + '\u06fd','\u06fe','\u0700','\u070f','\u074b','\u074c','\u07b2','\u07bf','\u07f6','\u07f9','\u07fb','\u0900', + '\u0903','\u0903','\u093a','\u093b','\u093e','\u0940','\u0949','\u094c','\u094e','\u094f','\u0955','\u0957', + '\u0964','\u0965','\u0970','\u0970','\u0973','\u097a','\u0980','\u0980','\u0982','\u0984','\u098d','\u098e', + '\u0991','\u0992','\u09a9','\u09a9','\u09b1','\u09b1','\u09b3','\u09b5','\u09ba','\u09bb','\u09be','\u09c0', + '\u09c5','\u09cc','\u09cf','\u09db','\u09de','\u09de','\u09e4','\u09e5','\u09f2','\u0a00','\u0a03','\u0a04', + '\u0a0b','\u0a0e','\u0a11','\u0a12','\u0a29','\u0a29','\u0a31','\u0a31','\u0a34','\u0a34','\u0a37','\u0a37', + '\u0a3a','\u0a3b','\u0a3d','\u0a40','\u0a43','\u0a46','\u0a49','\u0a4a','\u0a4e','\u0a50','\u0a52','\u0a58', + '\u0a5d','\u0a5d','\u0a5f','\u0a65','\u0a76','\u0a80','\u0a83','\u0a84','\u0a8e','\u0a8e','\u0a92','\u0a92', + '\u0aa9','\u0aa9','\u0ab1','\u0ab1','\u0ab4','\u0ab4','\u0aba','\u0abb','\u0abe','\u0ac0','\u0ac6','\u0ac6', + '\u0ac9','\u0acc','\u0ace','\u0acf','\u0ad1','\u0adf','\u0ae4','\u0ae5','\u0af0','\u0b00','\u0b02','\u0b04', + '\u0b0d','\u0b0e','\u0b11','\u0b12','\u0b29','\u0b29','\u0b31','\u0b31','\u0b34','\u0b34','\u0b3a','\u0b3b', + '\u0b3e','\u0b3e','\u0b40','\u0b40','\u0b45','\u0b4c','\u0b4e','\u0b55','\u0b57','\u0b5b','\u0b5e','\u0b5e', + '\u0b64','\u0b65','\u0b70','\u0b70','\u0b72','\u0b81','\u0b84','\u0b84','\u0b8b','\u0b8d','\u0b91','\u0b91', + '\u0b96','\u0b98','\u0b9b','\u0b9b','\u0b9d','\u0b9d','\u0ba0','\u0ba2','\u0ba5','\u0ba7','\u0bab','\u0bad', + '\u0bba','\u0bbf','\u0bc1','\u0bcc','\u0bce','\u0bcf','\u0bd1','\u0be5','\u0bf0','\u0c04','\u0c0d','\u0c0d', + '\u0c11','\u0c11','\u0c29','\u0c29','\u0c34','\u0c34','\u0c3a','\u0c3c','\u0c41','\u0c45','\u0c49','\u0c49', + '\u0c4e','\u0c54','\u0c57','\u0c57','\u0c5a','\u0c5f','\u0c64','\u0c65','\u0c70','\u0c84','\u0c8d','\u0c8d', + '\u0c91','\u0c91','\u0ca9','\u0ca9','\u0cb4','\u0cb4','\u0cba','\u0cbb','\u0cbe','\u0cbe','\u0cc0','\u0cc5', + '\u0cc7','\u0ccb','\u0cce','\u0cdd','\u0cdf','\u0cdf','\u0ce4','\u0ce5','\u0cf0','\u0d04','\u0d0d','\u0d0d', + '\u0d11','\u0d11','\u0d29','\u0d29','\u0d3a','\u0d3c','\u0d3e','\u0d40','\u0d45','\u0d4c','\u0d4e','\u0d5f', + '\u0d64','\u0d65','\u0d70','\u0d79','\u0d80','\u0d84','\u0d97','\u0d99','\u0db2','\u0db2','\u0dbc','\u0dbc', + '\u0dbe','\u0dbf','\u0dc7','\u0dc9','\u0dcb','\u0dd1','\u0dd5','\u0dd5','\u0dd7','\u0e00','\u0e3b','\u0e3f', + '\u0e4f','\u0e4f','\u0e5a','\u0e80','\u0e83','\u0e83','\u0e85','\u0e86','\u0e89','\u0e89','\u0e8b','\u0e8c', + '\u0e8e','\u0e93','\u0e98','\u0e98','\u0ea0','\u0ea0','\u0ea4','\u0ea4','\u0ea6','\u0ea6','\u0ea8','\u0ea9', + '\u0eac','\u0eac','\u0eba','\u0eba','\u0ebe','\u0ebf','\u0ec5','\u0ec5','\u0ec7','\u0ec7','\u0ece','\u0ecf', + '\u0eda','\u0edb','\u0ede','\u0eff','\u0f01','\u0f17','\u0f1a','\u0f1f','\u0f2a','\u0f34','\u0f36','\u0f36', + '\u0f38','\u0f38','\u0f3a','\u0f3f','\u0f48','\u0f48','\u0f6d','\u0f70','\u0f7f','\u0f7f','\u0f85','\u0f85', + '\u0f8c','\u0f8f','\u0f98','\u0f98','\u0fbd','\u0fc5','\u0fc7','\u0fff','\u102b','\u102c','\u1031','\u1031', + '\u1038','\u1038','\u103b','\u103c','\u104a','\u104f','\u1056','\u1057','\u1062','\u1064','\u1067','\u106d', + '\u1083','\u1084','\u1087','\u108c','\u108f','\u108f','\u109a','\u109f','\u10c6','\u10cf','\u10fb','\u10fb', + '\u10fd','\u10ff','\u115a','\u115e','\u11a3','\u11a7','\u11fa','\u11ff','\u1249','\u1249','\u124e','\u124f', + '\u1257','\u1257','\u1259','\u1259','\u125e','\u125f','\u1289','\u1289','\u128e','\u128f','\u12b1','\u12b1', + '\u12b6','\u12b7','\u12bf','\u12bf','\u12c1','\u12c1','\u12c6','\u12c7','\u12d7','\u12d7','\u1311','\u1311', + '\u1316','\u1317','\u135b','\u135e','\u1360','\u137f','\u1390','\u139f','\u13f5','\u1400','\u166d','\u166e', + '\u1677','\u1680','\u169b','\u169f','\u16eb','\u16ff','\u170d','\u170d','\u1715','\u171f','\u1735','\u173f', + '\u1754','\u175f','\u176d','\u176d','\u1771','\u1771','\u1774','\u177f','\u17b4','\u17b6','\u17be','\u17c5', + '\u17c7','\u17c8','\u17d4','\u17d6','\u17d8','\u17db','\u17de','\u17df','\u17ea','\u180a','\u180e','\u180f', + '\u181a','\u181f','\u1878','\u187f','\u18ab','\u18ff','\u191d','\u191f','\u1923','\u1926','\u1929','\u1931', + '\u1933','\u1938','\u193c','\u1945','\u196e','\u196f','\u1975','\u197f','\u19aa','\u19c0','\u19c8','\u19cf', + '\u19da','\u19ff','\u1a19','\u1aff','\u1b04','\u1b04','\u1b35','\u1b35','\u1b3b','\u1b3b','\u1b3d','\u1b41', + '\u1b43','\u1b44','\u1b4c','\u1b4f','\u1b5a','\u1b6a','\u1b74','\u1b7f','\u1b82','\u1b82','\u1ba1','\u1ba1', + '\u1ba6','\u1ba7','\u1baa','\u1bad','\u1bba','\u1bff','\u1c24','\u1c2b','\u1c34','\u1c35','\u1c38','\u1c3f', + '\u1c4a','\u1c4c','\u1c7e','\u1cff','\u1de7','\u1dfd','\u1f16','\u1f17','\u1f1e','\u1f1f','\u1f46','\u1f47', + '\u1f4e','\u1f4f','\u1f58','\u1f58','\u1f5a','\u1f5a','\u1f5c','\u1f5c','\u1f5e','\u1f5e','\u1f7e','\u1f7f', + '\u1fb5','\u1fb5','\u1fbd','\u1fbd','\u1fbf','\u1fc1','\u1fc5','\u1fc5','\u1fcd','\u1fcf','\u1fd4','\u1fd5', + '\u1fdc','\u1fdf','\u1fed','\u1ff1','\u1ff5','\u1ff5','\u1ffd','\u203e','\u2041','\u2053','\u2055','\u2070', + '\u2072','\u207e','\u2080','\u208f','\u2095','\u20cf','\u20dd','\u20e0','\u20e2','\u20e4','\u20f1','\u2101', + '\u2103','\u2106','\u2108','\u2109','\u2114','\u2114','\u2116','\u2118','\u211e','\u2123','\u2125','\u2125', + '\u2127','\u2127','\u2129','\u2129','\u212e','\u212e','\u213a','\u213b','\u2140','\u2144','\u214a','\u214d', + '\u214f','\u2182','\u2185','\u2bff','\u2c2f','\u2c2f','\u2c5f','\u2c5f','\u2c70','\u2c70','\u2c7e','\u2c7f', + '\u2ce5','\u2cff','\u2d26','\u2d2f','\u2d66','\u2d6e','\u2d70','\u2d7f','\u2d97','\u2d9f','\u2da7','\u2da7', + '\u2daf','\u2daf','\u2db7','\u2db7','\u2dbf','\u2dbf','\u2dc7','\u2dc7','\u2dcf','\u2dcf','\u2dd7','\u2dd7', + '\u2ddf','\u2ddf','\u2e00','\u2e2e','\u2e30','\u3004','\u3007','\u3029','\u3030','\u3030','\u3036','\u303a', + '\u303d','\u3040','\u3097','\u3098','\u309b','\u309c','\u30a0','\u30a0','\u30fb','\u30fb','\u3100','\u3104', + '\u312e','\u3130','\u318f','\u319f','\u31b8','\u31ef','\u3200','\u33ff','\u4db6','\u4dff','\u9fc4','\u9fff', + '\ua48d','\ua4ff','\ua60d','\ua60f','\ua62c','\ua63f','\ua660','\ua661','\ua670','\ua67b','\ua67e','\ua67e', + '\ua698','\ua716','\ua720','\ua721','\ua789','\ua78a','\ua78d','\ua7fa','\ua823','\ua824','\ua827','\ua83f', + '\ua874','\ua881','\ua8b4','\ua8c3','\ua8c5','\ua8cf','\ua8da','\ua8ff','\ua92e','\ua92f','\ua952','\ua9ff', + '\uaa2f','\uaa30','\uaa33','\uaa34','\uaa37','\uaa3f','\uaa4d','\uaa4f','\uaa5a','\uabff','\ud7a4','\uf8ff', + '\ufa2e','\ufa2f','\ufa6b','\ufa6f','\ufada','\ufaff','\ufb07','\ufb12','\ufb18','\ufb1c','\ufb29','\ufb29', + '\ufb37','\ufb37','\ufb3d','\ufb3d','\ufb3f','\ufb3f','\ufb42','\ufb42','\ufb45','\ufb45','\ufbb2','\ufbd2', + '\ufd3e','\ufd4f','\ufd90','\ufd91','\ufdc8','\ufdef','\ufdfc','\ufdff','\ufe10','\ufe1f','\ufe27','\ufe32', + '\ufe35','\ufe4c','\ufe50','\ufe6f','\ufe75','\ufe75','\ufefd','\uff0f','\uff1a','\uff20','\uff3b','\uff3e', + '\uff40','\uff40','\uff5b','\uff65','\uffbf','\uffc1','\uffc8','\uffc9','\uffd0','\uffd1','\uffd8','\uffd9' + }; + private readonly TextReader _input; + private State _state; + + + public static CharSet AllCharactersExceptNull => CharRange((char)1, char.MaxValue); + + public static CharSet AllWhitespaceCharacters => new CharSet(false, '\u0009', '\u000d', '\u0020', '\u0020', '\u0085', '\u0085', '\u00a0', '\u00a0', + '\u1680', '\u1680', '\u180e', '\u180e', '\u2000', '\u200a', '\u2028', '\u2029', + '\u202f', '\u202f', '\u205f', '\u205f', '\u3000', '\u3000'); + + + public RegexLexer(TextReader input) + { + _input = input; + _state = State.Normal; + } + + private CharSet EscapedCharToAcceptCharRange(char c) + { + switch (c) + { + // A lot of these are REALLY funky numbers. Tibetan numbers and such. You name it + case 'd': + return new CharSet(false, escd); + // Shorthand for [^0-9] + case 'D': + return new CharSet(false, escD); + case 's': + return AllWhitespaceCharacters; + case 'S': + return AllCharactersExceptNull.Except(AllWhitespaceCharacters); + case 'w': + return new CharSet(false, escw); + case 'W': + return new CharSet(false, escW); + case 'n': + return SingleChar('\n', false); + case 'r': + return SingleChar('\r', false); + case 't': + return SingleChar('\t', false); + case '.': + case '*': + case '|': + case '[': + case ']': + case '+': + case '(': + case ')': + case '\\': + case '{': + case '}': + case ' ': + case '?': + return SingleChar(c, false); + default: + return new CharSet(); // Empty charset, might be added to + } + } + + private CharSet SingleChar(char c, bool ignoreCase) + { + CharSet cs = new CharSet(); + + if (ignoreCase) + { + char lower = char.ToLowerInvariant(c); + char upper = char.ToUpperInvariant(c); + + cs.Add(lower); + + if (lower != upper) + cs.Add(upper); + } + else + cs.Add(c); + + return cs; + } + + private CharSet EscapedCharToAcceptCharsInClass(char c) + { + // There are some additional escapeable characters for a character class + switch (c) + { + case '-': + case '^': + return SingleChar(c, false); + } + + return EscapedCharToAcceptCharRange(c); + } + + public RegexToken? NextToken(bool ignoreCase) + { + // These keeps track of classes + CharacterClassState classState = new CharacterClassState(); + NumberedRepetitionState numberedRepetitionState = new NumberedRepetitionState(); + + _state = State.Normal; + + while (_input.Peek() != -1) + { + char c = (char)_input.Read(); + + switch (_state) + { + case State.Normal: + switch (c) + { + case '\\': + _state = State.NormalEscaped; + + break; + case '[': + _state = State.BeginCharacterClass; + + break; + case '{': + _state = State.NumberedRepetition; + + break; + case '(': return new RegexToken { Type = RegexTokenType.OperatorOpenParanthesis }; + case ')': return new RegexToken { Type = RegexTokenType.OperatorCloseParanthesis }; + case '|': return new RegexToken { Type = RegexTokenType.OperatorOr }; + case '+': return new RegexToken { Type = RegexTokenType.OperatorPlus }; + case '*': return new RegexToken { Type = RegexTokenType.OperatorMul }; + case '?': return new RegexToken { Type = RegexTokenType.OperatorQuestion }; + case '.': return new RegexToken { Type = RegexTokenType.Accept, Characters = AllCharactersExceptNull }; + default: return new RegexToken { Type = RegexTokenType.Accept, Characters = SingleChar(c, ignoreCase) }; + } + + break; + case State.NormalEscaped: + { + CharSet characters = EscapedCharToAcceptCharRange(c); + + return characters.Any() ? new RegexToken { Characters = characters, Type = RegexTokenType.Accept } + : throw new LexerConstructionException($"Unknown escaped character '{c}'."); + } + case State.BeginCharacterClass: + switch (c) + { + case '^': + if (classState.Negated) + { + // If the classstate is ALREADY negated + // Readd the ^ to the expression + classState.LastChar = '^'; + _state = State.InsideCharacterClass; + } + + classState.Negated = true; + + break; + case '[': + case ']': + case '-': + // This does not break the character class TODO: I THINK!!! + classState.LastChar = c; + + break; + case '\\': + _state = State.InsideCharacterClassEscaped; + + break; + default: + classState.LastChar = c; + _state = State.InsideCharacterClass; + + break; + } + break; + case State.InsideCharacterClass: + switch (c) + { + case '-': + _state = State.RangeEnd; + + break; + case '[': + throw new LexerConstructionException("Opening new character class inside an already open one."); + case ']': + if (classState.LastChar != (char)0) + classState.CharsSet.Add(classState.LastChar); + + // Ending class + return new RegexToken + { + Type = RegexTokenType.Accept, + Characters = classState.Negated ? AllCharactersExceptNull.Except(classState.CharsSet) : classState.CharsSet + }; + case '\\': + _state = State.InsideCharacterClassEscaped; + + break; + default: + if (classState.LastChar != 0) + classState.CharsSet.Add(classState.LastChar); + + classState.LastChar = c; + + break; + } + + break; + case State.InsideCharacterClassEscaped: + { + CharSet characters = EscapedCharToAcceptCharsInClass(c); + + if (!characters.Any()) + throw new LexerConstructionException(string.Format("Unknown escaped character '{0}' in character class", c)); + + if (classState.LastChar != 0) + classState.CharsSet.Add(classState.LastChar); + + classState.CharsSet.UnionWith(characters); + classState.LastChar = (char)0; + _state = State.InsideCharacterClass; + } + + break; + case State.RangeEnd: + switch (c) + { + case ']': + // We found the - at the position BEFORE the end of the class + // which means we should handle it as a litteral and end the class + classState.CharsSet.Add(classState.LastChar); + classState.CharsSet.Add('-'); + + return new RegexToken + { + Type = RegexTokenType.Accept, + Characters = classState.Negated + ? AllCharactersExceptNull.Except(classState.CharsSet) + : classState.CharsSet + }; + default: + char lastClassChar = classState.LastChar; + char from = lastClassChar < c ? lastClassChar : c; + char to = lastClassChar < c ? c : lastClassChar; + + classState.CharsSet.AddRange(from, to); + classState.LastChar = (char)0; + _state = State.InsideCharacterClass; + + break; + } + + break; + case State.NumberedRepetition: + switch (c) + { + case '0': // Is it really OK to start with a 0. It is now. + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + numberedRepetitionState.Chars.Add(c); + + break; + case '}': + case ':': + case ',': + // Parse whatever is in Chars + int reps; + + // Number is required in FIRST part but OPTIONAL in the second + if (numberedRepetitionState.Chars.Any() || numberedRepetitionState.CurrentPart == 0) + { + if (!int.TryParse(new string(numberedRepetitionState.Chars.ToArray()), out reps)) + throw new LexerConstructionException("Numbered repetition operator contains operand that is not a number"); + } + else + { + // End up here when nothing specified in the last part. + // Use the max value to say that it can be infinite numbers. + reps = int.MaxValue; + } + + numberedRepetitionState.Chars.Clear(); + + // Set the right value + if (numberedRepetitionState.CurrentPart == 0) + numberedRepetitionState.MinRepetitions = reps; + else + numberedRepetitionState.MaxRepetitions = reps; + + if (c == ':' || c == ',') + { + ++numberedRepetitionState.CurrentPart; + + if (numberedRepetitionState.CurrentPart > 1) + throw new LexerConstructionException("More than one ',' in numbered repetition."); + } + else + return new RegexToken + { + Type = RegexTokenType.NumberedRepeat, + MinRepetitions = numberedRepetitionState.MinRepetitions, + MaxRepetitions = numberedRepetitionState.MaxRepetitions + }; + + break; + default: + throw new LexerConstructionException($"Illegal character {c} in numbered repetition"); + } + + break; + } + } + + // We get here if we try to lex when the expression has ended. + return null; + } + + private static CharSet CharRange(char start, char end) + { + CharSet charRange = new CharSet(); + + charRange.AddRange(start, end); + + return charRange; + } + + private enum State + { + Normal, + NormalEscaped, + BeginCharacterClass, + InsideCharacterClass, + RangeEnd, + NumberedRepetition, + InsideCharacterClassEscaped + } + + private sealed class CharacterClassState + { + public CharSet CharsSet { get; private set; } + public bool Negated { get; set; } + public char LastChar { get; set; } + + + public CharacterClassState() => CharsSet = new CharSet(); + } + + private sealed class NumberedRepetitionState + { + public int MaxRepetitions { get; set; } + public int MinRepetitions { get; set; } + public List Chars { get; private set; } + public int CurrentPart { get; set; } + + + public NumberedRepetitionState() + { + MinRepetitions = -1; + MaxRepetitions = -1; + Chars = new List(); + } + } + } +} \ No newline at end of file diff --git a/Piglet/Lexer/Construction/RegExToken.cs b/Piglet/Lexer/Construction/RegExToken.cs index 33aad8e..f915807 100644 --- a/Piglet/Lexer/Construction/RegExToken.cs +++ b/Piglet/Lexer/Construction/RegExToken.cs @@ -1,45 +1,37 @@ -using System; using System.Collections.Generic; -using System.Linq; namespace Piglet.Lexer.Construction { - internal class RegExToken - { - internal enum TokenType + internal sealed class RegexToken + { + private static readonly Dictionary _precedences = new Dictionary { - OperatorOr, - OperatorPlus, - OperatorMul, - OperatorQuestion, - Accept, - NumberedRepeat, - OperatorOpenParanthesis, - OperatorCloseParanthesis, - OperatorConcat - } - - private static readonly Dictionary precedences = new Dictionary { - {TokenType.OperatorPlus, 3}, - {TokenType.OperatorMul, 3}, - {TokenType.OperatorQuestion, 3}, - {TokenType.NumberedRepeat, 3}, - {TokenType.OperatorConcat, 2}, - {TokenType.OperatorOr, 1}, - {TokenType.OperatorOpenParanthesis, 0} - }; - - public TokenType Type { get; set; } + [RegexTokenType.OperatorPlus] = 3, + [RegexTokenType.OperatorMul] = 3, + [RegexTokenType.OperatorQuestion] = 3, + [RegexTokenType.NumberedRepeat] = 3, + [RegexTokenType.OperatorConcat] = 2, + [RegexTokenType.OperatorOr] = 1, + [RegexTokenType.OperatorOpenParanthesis] = 0, + }; + public RegexTokenType Type { get; set; } public CharSet Characters { get; set; } - public int MinRepetitions { get; set; } - public int MaxRepetitions { get; set; } + public int Precedence => _precedences[Type]; + } - public int Precedence - { - get { return precedences[Type]; } - } + internal enum RegexTokenType + { + OperatorOr, + OperatorPlus, + OperatorMul, + OperatorQuestion, + Accept, + NumberedRepeat, + OperatorOpenParanthesis, + OperatorCloseParanthesis, + OperatorConcat } } \ No newline at end of file diff --git a/Piglet/Lexer/Construction/ShuntingYard.cs b/Piglet/Lexer/Construction/ShuntingYard.cs index 40df2d4..11a7041 100644 --- a/Piglet/Lexer/Construction/ShuntingYard.cs +++ b/Piglet/Lexer/Construction/ShuntingYard.cs @@ -1,113 +1,111 @@ -using System.Collections.Generic; -using System.Linq; - -namespace Piglet.Lexer.Construction -{ - internal class ShuntingYard - { - private readonly RegExLexer lexer; - - public ShuntingYard(RegExLexer lexer) - { - this.lexer = lexer; - } - - - private IEnumerable TokensWithImplicitConcat() - { - RegExToken lastToken = null; - - for (var token = lexer.NextToken(); token != null; ) - { - // If the last token was accept and this new token is also accept we need to insert a concat operator - // between the two. - if (lastToken != null && - PreceedingTypeRequiresConcat(lastToken.Type) && - NextTypeRequiresConcat(token.Type)) - { - yield return new RegExToken { Type = RegExToken.TokenType.OperatorConcat }; - } - - yield return token; - - lastToken = token; - token = lexer.NextToken(); - } - } - - private bool PreceedingTypeRequiresConcat(RegExToken.TokenType type) - { - switch (type) - { - case RegExToken.TokenType.OperatorMul: - case RegExToken.TokenType.OperatorQuestion: - case RegExToken.TokenType.OperatorPlus: - case RegExToken.TokenType.Accept: - case RegExToken.TokenType.OperatorCloseParanthesis: - case RegExToken.TokenType.NumberedRepeat: - return true; - } - return false; - } - - private bool NextTypeRequiresConcat(RegExToken.TokenType type) - { - switch (type) - { - case RegExToken.TokenType.Accept: - case RegExToken.TokenType.OperatorOpenParanthesis: - return true; - - } - return false; - } - - public IEnumerable ShuntedTokens() - { - var operatorStack = new Stack(); - - foreach (var token in TokensWithImplicitConcat()) - { - switch (token.Type) - { - case RegExToken.TokenType.Accept: - yield return token; - break; - - case RegExToken.TokenType.OperatorOpenParanthesis: - operatorStack.Push(token); - break; - - case RegExToken.TokenType.OperatorCloseParanthesis: - while (operatorStack.Any() && operatorStack.Peek().Type != RegExToken.TokenType.OperatorOpenParanthesis) - { - yield return operatorStack.Pop(); - } - if (!operatorStack.Any()) - // Mismatched parenthesis - throw new LexerConstructionException("Mismatched parenthesis in regular expression"); - operatorStack.Pop(); - break; - - default: - while (operatorStack.Any() && token.Precedence <= operatorStack.Peek().Precedence) - { - yield return operatorStack.Pop(); - } - operatorStack.Push(token); - break; - } - } - - while (operatorStack.Any()) - { - var op = operatorStack.Pop(); - - if (op.Type == RegExToken.TokenType.OperatorOpenParanthesis) - throw new LexerConstructionException("Mismatched parenthesis in regular expression"); - - yield return op; - } - } - } -} +using System.Collections.Generic; +using System.Linq; + +namespace Piglet.Lexer.Construction +{ + internal sealed class ShuntingYard + { + private readonly RegexLexer _lexer; + private readonly bool _ignorecase; + + + public ShuntingYard(RegexLexer lexer, bool ignorecase) + { + _lexer = lexer; + _ignorecase = ignorecase; + } + + private IEnumerable TokensWithImplicitConcat() + { + RegexToken? lastToken = null; + + for (RegexToken? token = _lexer.NextToken(_ignorecase); token is { };) + { + // If the last token was accept and this new token is also accept we need to insert a concat operator between the two. + if (lastToken != null && PreceedingTypeRequiresConcat(lastToken.Type) && NextTypeRequiresConcat(token.Type)) + yield return new RegexToken { Type = RegexTokenType.OperatorConcat }; + + yield return token; + + lastToken = token; + token = _lexer.NextToken(_ignorecase); + } + } + + private static bool PreceedingTypeRequiresConcat(RegexTokenType type) + { + switch (type) + { + case RegexTokenType.OperatorMul: + case RegexTokenType.OperatorQuestion: + case RegexTokenType.OperatorPlus: + case RegexTokenType.Accept: + case RegexTokenType.OperatorCloseParanthesis: + case RegexTokenType.NumberedRepeat: + return true; + } + + return false; + } + + private static bool NextTypeRequiresConcat(RegexTokenType type) + { + switch (type) + { + case RegexTokenType.Accept: + case RegexTokenType.OperatorOpenParanthesis: + return true; + } + + return false; + } + + public IEnumerable ShuntedTokens() + { + Stack operatorStack = new Stack(); + + foreach (RegexToken token in TokensWithImplicitConcat()) + { + switch (token.Type) + { + case RegexTokenType.Accept: + yield return token; + + break; + case RegexTokenType.OperatorOpenParanthesis: + operatorStack.Push(token); + + break; + case RegexTokenType.OperatorCloseParanthesis: + while (operatorStack.Any() && operatorStack.Peek().Type != RegexTokenType.OperatorOpenParanthesis) + yield return operatorStack.Pop(); + + if (!operatorStack.Any()) + // Mismatched parenthesis + throw new LexerConstructionException("Mismatched parenthesis in regular expression"); + + operatorStack.Pop(); + + break; + default: + while (operatorStack.Any() && token.Precedence <= operatorStack.Peek().Precedence) + yield return operatorStack.Pop(); + + operatorStack.Push(token); + + break; + } + } + + while (operatorStack.Any()) + { + RegexToken op = operatorStack.Pop(); + + if (op.Type == RegexTokenType.OperatorOpenParanthesis) + throw new LexerConstructionException("Mismatched parenthesis in regular expression"); + + yield return op; + } + } + } +} \ No newline at end of file diff --git a/Piglet/Lexer/Construction/Transition.cs b/Piglet/Lexer/Construction/Transition.cs index 85e0d7a..f0b32d9 100644 --- a/Piglet/Lexer/Construction/Transition.cs +++ b/Piglet/Lexer/Construction/Transition.cs @@ -1,24 +1,25 @@ -namespace Piglet.Lexer.Construction +using System.Diagnostics.CodeAnalysis; + +namespace Piglet.Lexer.Construction { internal class Transition { + [MaybeNull] public TState From { get; set; } + [MaybeNull] public TState To { get; set; } // An empty set of valid input means that this is an Epsilon transition. Epsilon transitions // are only valid in NFAs public CharSet ValidInput { get; internal set; } - public Transition(TState from, TState to, CharSet validInput = null) + public Transition(TState from, TState to, CharSet? validInput = null) { - ValidInput = validInput??new CharSet(); + ValidInput = validInput ?? new CharSet(); From = from; To = to; } - public override string ToString() - { - return string.Format("{0} ={1}=> {2}", From, To, ValidInput); - } + public override string ToString() => string.Format("{0} ={1}=> {2}", From, To, ValidInput); } } diff --git a/Piglet/Lexer/Construction/TriangularTable.cs b/Piglet/Lexer/Construction/TriangularTable.cs index f6e4686..7e174ab 100644 --- a/Piglet/Lexer/Construction/TriangularTable.cs +++ b/Piglet/Lexer/Construction/TriangularTable.cs @@ -1,60 +1,62 @@ using System; -using System.Text; namespace Piglet.Lexer.Construction { - internal class TriangularTable + internal sealed class TriangularTable { // Space-inefficient implementation, we never use half the table. // Figure speed is more important than space these days - private readonly TIndexType[,] table; - private readonly Func objIndexFunc; - private readonly int tableSize; + private readonly TIndexType[,] _table; + private readonly Func _objIndexFunc; + private readonly int _tableSize; + public TriangularTable(int tableSize, Func objIndexFunc) { - this.tableSize = tableSize; - table = new TIndexType[tableSize,tableSize]; - this.objIndexFunc = objIndexFunc; + _tableSize = tableSize; + _table = new TIndexType[tableSize,tableSize]; + _objIndexFunc = objIndexFunc; } public void Fill(TIndexType value) { - for (int i = 0; i < tableSize; ++i) - for (int j = 0; j < tableSize; ++j ) - table[i, j] = value; + for (int i = 0; i < _tableSize; ++i) + for (int j = 0; j < _tableSize; ++j ) + _table[i, j] = value; } public TIndexType this[TObjectType a, TObjectType b] { get { - int ia = objIndexFunc(a); - int ib = objIndexFunc(b); + int ia = _objIndexFunc(a); + int ib = _objIndexFunc(b); // ia must contain the larger of the two if (ia < ib) { - var t = ia; + int t = ia; + ia = ib; ib = t; } - return table[ia, ib]; - } + return _table[ia, ib]; + } set { - int ia = objIndexFunc(a); - int ib = objIndexFunc(b); + int ia = _objIndexFunc(a); + int ib = _objIndexFunc(b); // ia must contain the larger of the two if (ia < ib) { - var t = ia; + int t = ia; ia = ib; ib = t; } - table[ia, ib] = value; + + _table[ia, ib] = value; } } } diff --git a/Piglet/Lexer/ILexer.cs b/Piglet/Lexer/ILexer.cs index c518fac..f1d3b65 100644 --- a/Piglet/Lexer/ILexer.cs +++ b/Piglet/Lexer/ILexer.cs @@ -1,34 +1,34 @@ -using System; -using System.Collections; -using System.Collections.Generic; -using System.IO; - -namespace Piglet.Lexer -{ - /// - /// A lexer that tokenizes input into tuples of tokennumber and semantic value. Lexers are not thread safe, but they are reentrant. You - /// can reuse the same lexer by setting a new character source. - /// - /// The semantic value type - public interface ILexer - { - /// - /// Begin lexing a text - /// - /// TextReader to read from - ILexerInstance Begin(TextReader reader); - - /// - /// Begin lexing a string. This method is the same as writing Begin(new StringReader(source)) - /// - /// Source string to read from - ILexerInstance Begin(string source); - - /// - /// Tokenize a string - /// - /// Input string to tokenize - /// - IEnumerable> Tokenize(string source); - } +using System.Collections.Generic; +using System.IO; + +using Piglet.Lexer.Runtime; + +namespace Piglet.Lexer +{ + /// + /// A lexer that tokenizes input into tuples of tokennumber and semantic value. Lexers are not thread safe, but they are reentrant. You + /// can reuse the same lexer by setting a new character source. + /// + /// The semantic value type + public interface ILexer + { + /// + /// Begin lexing a text + /// + /// TextReader to read from + ILexerInstance Begin(TextReader reader); + + /// + /// Begin lexing a string. This method is the same as writing Begin(new StringReader(source)) + /// + /// Source string to read from + ILexerInstance Begin(string source); + + /// + /// Tokenize a string + /// + /// Input string to tokenize + /// + IEnumerable<(int number, LexedToken token)> Tokenize(string source); + } } \ No newline at end of file diff --git a/Piglet/Lexer/ILexerInstance.cs b/Piglet/Lexer/ILexerInstance.cs index 10fb243..f2d4e51 100644 --- a/Piglet/Lexer/ILexerInstance.cs +++ b/Piglet/Lexer/ILexerInstance.cs @@ -1,20 +1,21 @@ -using System; - -namespace Piglet.Lexer -{ - /// - /// A running instance of a lexer containing the lexer state - /// - /// Return type of the lexer tokens - public interface ILexerInstance : ILexerState - { - /// - /// Gets the next token from the input stream. - /// - /// A tuple where firstitem is token number, and second item is the tokens semantic value. If the - /// end of input is reached the lexer will return the configuration given end of input token number and default(T) as the - /// semantic value - /// LexerException if illegal characters are detected - Tuple Next(); - } +using Piglet.Lexer.Runtime; + +namespace Piglet.Lexer +{ + /// + /// A running instance of a lexer containing the lexer state + /// + /// Return type of the lexer tokens + public interface ILexerInstance + : ILexerState + { + /// + /// Gets the next token from the input stream. + /// + /// A tuple where firstitem is token number, and second item is the tokens semantic value. If the + /// end of input is reached the lexer will return the configuration given end of input token number and default(T) as the + /// semantic value + /// LexerException if illegal characters are detected + (int number, LexedToken token) Next(); + } } \ No newline at end of file diff --git a/Piglet/Lexer/ILexerState.cs b/Piglet/Lexer/ILexerState.cs index dd25b91..b68de1f 100644 --- a/Piglet/Lexer/ILexerState.cs +++ b/Piglet/Lexer/ILexerState.cs @@ -6,10 +6,20 @@ namespace Piglet.Lexer public interface ILexerState { /// - /// The current line number in the input text + /// The current line number in the input text (one-based). /// int CurrentLineNumber { get; } + /// + /// The current character index in the input text (zero-based). + /// + int CurrentAbsoluteIndex { get; } + + /// + /// The current character index inside the current line (zero-based). + /// + int CurrentCharacterIndex { get; } + /// /// The contents so far of the current line /// diff --git a/Piglet/Lexer/LexerException.cs b/Piglet/Lexer/LexerException.cs index 6fe47f4..4dfce6d 100644 --- a/Piglet/Lexer/LexerException.cs +++ b/Piglet/Lexer/LexerException.cs @@ -1,29 +1,56 @@ -using System; - -namespace Piglet.Lexer -{ - /// - /// LexerExceptions are thrown when the lexer cannot make sense of the current input. - /// - public class LexerException : Exception - { - /// - /// The current line number of the document the lexer is scanning. - /// - public int LineNumber { get; internal set; } - - /// - /// The contents of the current line so far of the current document - /// - public string LineContents { get; internal set; } - - /// - /// Construct a new LexerException - /// - /// Message to display - public LexerException(string message) - : base(message) - { - } - } +using System; + +namespace Piglet.Lexer +{ + /// + /// LexerExceptions are thrown when the lexer cannot make sense of the current input. + /// + public sealed class LexerException + : Exception + { + /// + /// The current line number of the document the lexer is scanning. + /// + public int LineNumber { get; } + + /// + /// The contents of the current line so far of the current document. + /// + public string LineContents { get; } + + /// + /// The current character index inside the current line (zero-based). + /// + public int CharacterIndex { get; } + + /// + /// The current character index in the input text (zero-based). + /// + public int CurrentAbsoluteIndex { get; } + + /// + /// The lexed input string. + /// + public string Input { get; } + + + /// + /// Construct a new LexerException + /// + /// Message to display + /// The current line number of the document the lexer is scanning. + /// The contents of the current line so far of the current document. + /// The current character index inside the current line (zero-based). + /// The current character index in the input text (zero-based). + /// The lexed input string. + internal LexerException(string message, int lineNumber, string lineContents, int characterIndex, int currentAbsoluteIndex, string input) + : base(message) + { + LineNumber = lineNumber; + LineContents = lineContents; + CharacterIndex = characterIndex; + CurrentAbsoluteIndex = currentAbsoluteIndex; + Input = input; + } + } } \ No newline at end of file diff --git a/Piglet/Lexer/LexerFactory.cs b/Piglet/Lexer/LexerFactory.cs index 0137bbd..673bf34 100644 --- a/Piglet/Lexer/LexerFactory.cs +++ b/Piglet/Lexer/LexerFactory.cs @@ -1,8 +1,10 @@ -using System; +using System.Collections.Generic; +using System.Linq; +using System; + using Piglet.Lexer.Configuration; using Piglet.Parser.Configuration; using Piglet.Parser.Construction; -using System.Linq; namespace Piglet.Lexer { @@ -20,8 +22,10 @@ public static class LexerFactory /// LexerConfigurationException for errors public static ILexer Configure(Action> configureAction) { - var lexerConfigurator = new LexerConfigurator(); + LexerConfigurator lexerConfigurator = new LexerConfigurator(); + configureAction(lexerConfigurator); + return lexerConfigurator.CreateLexer(); } @@ -31,7 +35,7 @@ public static ILexer Configure(Action> configureAction) /// Grammar to generate lexers from /// Additional lexing settings /// A lexer compatibe with the given grammars tokenizing rules - internal static ILexer ConfigureFromGrammar(IGrammar grammar, ILexerSettings lexerSettings) + internal static ILexer ConfigureFromGrammar(IGrammar grammar, ILexerSettings lexerSettings) => Configure(c => { // This works because the grammar tokens will recieve the same token number // since they are assigned to this list in just the same way. AND BECAUSE the @@ -39,25 +43,20 @@ internal static ILexer ConfigureFromGrammar(IGrammar grammar, ILexerSettin // This might be considered dodgy later on, since it makes it kinda sorta hard to // use other lexers with Piglet. Let's see what happens, if anyone ever wants to write their // own lexer for Piglet. - return Configure(c => - { - c.Runtime = lexerSettings.Runtime; - - var terminals = grammar.AllSymbols.OfType>().ToList(); - foreach (var terminal in terminals) - { - if (terminal.RegExp != null) - { - c.Token(terminal.RegExp, terminal.OnParse); - } - } - c.EndOfInputTokenNumber = terminals.FindIndex(f => f == grammar.EndOfInputTerminal); - - foreach (var ignored in lexerSettings.Ignore) - { - c.Ignore(ignored); - } - }); - } + + c.Runtime = lexerSettings.Runtime; + c.IgnoreCase = lexerSettings.IgnoreCase; + + List> terminals = grammar.AllSymbols.OfType>().ToList(); + + foreach (ITerminal terminal in terminals) + if (terminal.Regex is { } r) + c.Token(r, terminal.OnParse); + + c.EndOfInputTokenNumber = terminals.FindIndex(f => f == grammar.EndOfInputTerminal); + + foreach (string ignored in lexerSettings.Ignore) + c.Ignore(ignored); + }); } } diff --git a/Piglet/Lexer/Runtime/DfaLexer.cs b/Piglet/Lexer/Runtime/DfaLexer.cs index 4a7bd49..2de5074 100644 --- a/Piglet/Lexer/Runtime/DfaLexer.cs +++ b/Piglet/Lexer/Runtime/DfaLexer.cs @@ -1,65 +1,50 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using Piglet.Lexer.Construction; - -namespace Piglet.Lexer.Runtime -{ - internal class DfaLexer : LexerBase - { - private readonly DFA dfa; - private readonly Dictionary>> actions; - - public DfaLexer(DFA dfa, IList nfas, List>> tokens, int endOfInputTokenNumber) - : base(endOfInputTokenNumber) - { - this.dfa = dfa; - - actions = new Dictionary>>(); - - // Calculate which DFA state corresponds to each action - foreach (var dfaState in dfa.States) - { - var acceptingNfaStates = dfaState.NfaStates.Where(a => a.AcceptState).ToArray(); - if (acceptingNfaStates.Any()) - { - for (int i = 0; i < nfas.Count; ++i) - { - if (nfas[i].States.Intersect(acceptingNfaStates).Any()) - { - // This matches, we will store the action in the dictionary - actions.Add(dfaState, - i >= tokens.Count - ? new Tuple>(int.MinValue, null) - : new Tuple>(i, tokens[i].Item2)); - break; - } - } - } - } - } - - protected override Tuple> GetAction(DFA.State state) - { - return actions.ContainsKey(state) ? actions[state] : null; - } - - protected override bool ReachedTermination(DFA.State nextState) - { - return nextState == null; - } - - protected override DFA.State GetNextState(DFA.State state, char input) - { - return dfa.Transitions - .Where(f => f.From == state && f.ValidInput.Ranges.Any(r => r.From <= input && r.To >= input)) - .Select(f => f.To) - .SingleOrDefault(); - } - - protected override DFA.State GetInitialState() - { - return dfa.StartState; - } - } +using System.Collections.Generic; +using System.Linq; +using System; + +using Piglet.Lexer.Construction; + +namespace Piglet.Lexer.Runtime +{ + internal sealed class DfaLexer + : LexerBase + { + private readonly DFA _dfa; + private readonly Dictionary? action)> _actions; + + + public DfaLexer(DFA dfa, IList nfas, List<(string regex, Func action)> tokens, int endOfInputTokenNumber) + : base(endOfInputTokenNumber) + { + _dfa = dfa; + _actions = new Dictionary? action)>(); + + // Calculate which DFA state corresponds to each action + foreach (DFA.State dfaState in dfa.States) + { + NFA.State[] acceptingNfaStates = dfaState.NfaStates.Where(a => a.AcceptState).ToArray(); + + if (acceptingNfaStates.Any()) + for (int i = 0; i < nfas.Count; ++i) + if (nfas[i].States.Intersect(acceptingNfaStates).Any()) + { + // This matches, we will store the action in the dictionary + _actions[dfaState] = i >= tokens.Count ? (int.MinValue, null) : (i, tokens[i].action); + + break; + } + } + } + + protected override (int number, Func? action)? GetAction(DFA.State state) => _actions.ContainsKey(state) ? _actions?[state] : null; + + protected override bool ReachedTermination(DFA.State nextState) => nextState is null; + + protected override DFA.State GetNextState(DFA.State state, char input) => (from t in _dfa.Transitions + where t.From == state + where t.ValidInput.Ranges.Any(r => r.From <= input && r.To >= input) + select t.To).SingleOrDefault(); + + protected override DFA.State? GetInitialState() => _dfa.StartState; + } } \ No newline at end of file diff --git a/Piglet/Lexer/Runtime/LexedToken.cs b/Piglet/Lexer/Runtime/LexedToken.cs new file mode 100644 index 0000000..01421e4 --- /dev/null +++ b/Piglet/Lexer/Runtime/LexedToken.cs @@ -0,0 +1,124 @@ +using System.Collections.Generic; +using System.Linq; + +using Piglet.Parser.Configuration; + +namespace Piglet.Lexer.Runtime +{ + /// + /// Represents an abstract lexed (accepted) token. + /// + public abstract class LexedTokenBase + { + /// + /// Returns the string associated with the lexed symbol. + /// + public string? LexedString { get; } + /// + /// The token's absolute index in the input string (zero-based). + /// + public int AbsoluteIndex { get; } + /// + /// The token's starting line number (one-based). + /// + public int StartLineNumber { get; } + /// + /// The token's starting index inside the starting line (zero-based). + /// + public int StartCharacterIndex { get; } + /// + /// The token's length (in characters). + /// + public int Length { get; } + /// + /// Determines whether the token is a terminal token. + /// + public virtual bool IsTerminal { get; } = false; +#if DEBUG + /// + /// The debug name of the current token. + /// + public virtual string? DebugName => LexedString; +#endif + + private protected LexedTokenBase(int abs_index, int line, int char_index, int length) + : this(null, abs_index, line, char_index) => Length = length; + + private protected LexedTokenBase(string? str, int abs_index, int line, int char_index) + { + LexedString = str; + Length = str?.Length ?? 0; + AbsoluteIndex = abs_index; + StartLineNumber = line; + StartCharacterIndex = char_index; + } + } + + /// + /// Represents a lexed (accepted) token. + /// + /// The semantic value stored inside the lexed symbol. + public class LexedToken + : LexedTokenBase + { + /// + /// Returns the lexed symbol. + /// + public T SymbolValue { get; } + + + internal LexedToken(T value, string? str, int abs_index, int line, int char_index) + : base(str, abs_index, line, char_index) => SymbolValue = value; + + internal LexedToken(T value, int abs_index, int line, int char_index, int length) + : base(abs_index, line, char_index, length) => SymbolValue = value; + + // public LexedToken Cast() => this is LexedNonTerminal nt ? nt.Cast() : new LexedToken((U)(object)SymbolValue, LexedString, AbsoluteIndex, StartLineNumber, StartCharacterIndex, true); + + /// + public override string ToString() => $"[{AbsoluteIndex}..{AbsoluteIndex + Length}] \"{LexedString}\" at ({StartLineNumber}:{StartCharacterIndex})"; + } + + /// + /// Represents a lexed non-terminal token. + /// + /// The semantic value stored inside the lexed symbol. + public sealed class LexedNonTerminal + : LexedToken + { + internal INonTerminal NonTerminal { get; } + /// + /// The + /// + public LexedToken[] ChildNodes { get; } + public LexedToken FirstChild => ChildNodes[0]; + public LexedToken LastChild => ChildNodes[^1]; + /// + public override bool IsTerminal => true; +#if DEBUG + /// + public override string? DebugName => NonTerminal.DebugName; +#endif + + private LexedNonTerminal(T value, INonTerminal symbol, LexedToken[] ordered_children) + : base( + value, + ordered_children[0].AbsoluteIndex, + ordered_children[0].StartLineNumber, + ordered_children[0].StartCharacterIndex, + ordered_children[^1].AbsoluteIndex - ordered_children[0].AbsoluteIndex + ordered_children[^1].Length + ) + { + NonTerminal = symbol; + ChildNodes = ordered_children; + } + + internal LexedNonTerminal(T value, INonTerminal symbol, IEnumerable> children) + : this(value, symbol, children.OrderBy(c => c.AbsoluteIndex).ToArray() is { } arr && arr.Length > 0 ? arr : new[] { new LexedToken(default!, 0, 0, 0, 0) }) => NonTerminal = symbol; + + // public new LexedNonTerminal Cast() => new LexedNonTerminal((U)(object)SymbolValue, NonTerminal, ChildNodes as IEnumerable>); + + /// + public override string ToString() => $"[{AbsoluteIndex}..{AbsoluteIndex + Length}] \"{NonTerminal.DebugName}\" : {SymbolValue}"; + } +} diff --git a/Piglet/Lexer/Runtime/LexerBase.cs b/Piglet/Lexer/Runtime/LexerBase.cs index 5d8f98c..da83fb6 100644 --- a/Piglet/Lexer/Runtime/LexerBase.cs +++ b/Piglet/Lexer/Runtime/LexerBase.cs @@ -1,154 +1,151 @@ -using System; -using System.Collections.Generic; -using System.IO; -using System.Text; - -namespace Piglet.Lexer.Runtime -{ - internal abstract class LexerBase : ILexer - { - private readonly int endOfInputTokenNumber; - - protected LexerBase(int endOfInputTokenNumber) - { - this.endOfInputTokenNumber = endOfInputTokenNumber; - } - - private class LexerStateImpl : ILexerInstance - { - private readonly LexerBase lexer; - private readonly StringBuilder currentLine = new StringBuilder(); - private readonly StringBuilder lexeme = new StringBuilder(); - private readonly TextReader source; - - private int lineNumber = 1; - private TState state; - - public LexerStateImpl(TextReader source, LexerBase lexer) - { - this.lexer = lexer; - this.source = source; - } - - public int CurrentLineNumber { get { return lineNumber; } } - public string CurrentLine { get { return currentLine.ToString(); } } - public string LastLexeme { get { return lexeme.ToString(); } } - - public Tuple Next() - { - state = lexer.GetInitialState(); - - lexeme.Clear(); - - while (true) - { - int peek = source.Peek(); - - // Replace EOF with 0, or we will read outside of the table. - if (peek == -1) - { - // If reading the end of file and the lexeme is empty, return end of stream token - // If the lexeme isn't empty, it must try to find out whatever it is in the lexeme. - if (lexeme.Length == 0) - { - return new Tuple(lexer.endOfInputTokenNumber, default(T)); - } - peek = 0; - } - - var c = (char)peek; - TState nextState = lexer.GetNextState(state, c); - var reachedTermination = lexer.ReachedTermination(nextState); - - if (reachedTermination) - { - // We have reached termination - // Two possibilities, current state accepts, if so return token ID - // else there is an error - var action = lexer.GetAction(state); - if (action != null && lexeme.Length > 0) - { - // If tokennumber is int.MinValue it is an ignored token, like typically whitespace. - // In that case, dont return, continue lexing with the reset parser to get the next token. - if (action.Item1 == int.MinValue) - { - // Reset state - state = lexer.GetInitialState(); - - // Clear lexeme - lexeme.Clear(); - } - else - { - // Token completed. Return it - return new Tuple(action.Item1, - action.Item2 == null ? default(T) : action.Item2(lexeme.ToString())); - } - } - else - { - // We get here if there is no action at the state where the lexer cannot continue given the input. - // This is fail. - var lexerException = - new LexerException(string.Format("Invalid character '{0}'", - c == '\0' ? "NULL" : c.ToString())) - { - LineContents = currentLine.ToString(), - LineNumber = lineNumber - }; - - throw lexerException; - } - } - else - { - // Peek is still last char. If we are going to be switching lines - // add to the line number and clear the current line buffer - if (c == '\n') - { - lineNumber++; - currentLine.Clear(); - } - else - { - currentLine.Append(c); - } - - // Machine has not terminated. - // Switch states, append character to lexeme. - state = nextState; - lexeme.Append(c); - source.Read(); - } - } - } - } - - public ILexerInstance Begin(TextReader reader) - { - return new LexerStateImpl(reader, this); - } - - public ILexerInstance Begin(string source) - { - return Begin(new StringReader(source)); - } - - public IEnumerable> Tokenize(string source) - { - var instance = Begin(source); - for (var token = instance.Next(); token.Item1 != -1; token = instance.Next()) - { - yield return token; - } - } - - protected abstract Tuple> GetAction(TState state); - - protected abstract bool ReachedTermination(TState nextState); - - protected abstract TState GetNextState(TState state, char input); - - protected abstract TState GetInitialState(); - } -} +using System.Collections.Generic; +using System.Text; +using System.IO; +using System; +using System.Diagnostics.CodeAnalysis; + +namespace Piglet.Lexer.Runtime +{ + internal abstract class LexerBase + : ILexer + { + private readonly int _endOfInputTokenNumber; + + + protected LexerBase(int endOfInputTokenNumber) => _endOfInputTokenNumber = endOfInputTokenNumber; + + public ILexerInstance Begin(TextReader reader) => new LexerStateImpl(reader, this); + + public ILexerInstance Begin(string source) => Begin(new StringReader(source)); + + public IEnumerable<(int number, LexedToken token)> Tokenize(string source) + { + ILexerInstance instance = Begin(source); + + for ((int number, LexedToken token) token = instance.Next(); token.number != -1; token = instance.Next()) + yield return token; + } + + protected abstract (int number, Func? action)? GetAction(TState state); + + protected abstract bool ReachedTermination(TState nextState); + + protected abstract TState GetNextState(TState state, char input); + + [return: MaybeNull] + protected abstract TState GetInitialState(); + + + private sealed class LexerStateImpl + : ILexerInstance + { + private readonly LexerBase _lexer; + private readonly StringBuilder _currentLine = new StringBuilder(); + private readonly StringBuilder _lexeme = new StringBuilder(); + private readonly TextReader _source; + [MaybeNull] + private TState _state; + + + public int CurrentLineNumber { get; private set; } = 1; + public int CurrentAbsoluteIndex { get; private set; } = 0; + public int CurrentCharacterIndex { get; private set; } + public string CurrentLine => _currentLine.ToString(); + public string LastLexeme => _lexeme.ToString(); + + + public LexerStateImpl(TextReader source, LexerBase lexer) + { + _lexer = lexer; + _source = source; + CurrentAbsoluteIndex = 0; + CurrentCharacterIndex = 0; + } + + public (int number, LexedToken token) Next() + { + _state = _lexer.GetInitialState(); + _lexeme.Clear(); + + while (true) + { + int peek = _source.Peek(); + + // Replace EOF with 0, or we will read outside of the table. + if (peek == -1) + { + // If reading the end of file and the lexeme is empty, return end of stream token + // If the lexeme isn't empty, it must try to find out whatever it is in the lexeme. + if (_lexeme.Length == 0) + return (_lexer._endOfInputTokenNumber, new LexedToken(default, "", CurrentAbsoluteIndex, CurrentLineNumber, CurrentCharacterIndex)); + + peek = 0; + } + + char c = (char)peek; + TState nextState = _lexer.GetNextState(_state, c); + bool reachedTermination = _lexer.ReachedTermination(nextState); + + if (reachedTermination) + { + // We have reached termination. + // Two possibilities: current state accepts, if so return token ID otherwise there is an error + if (_lexer.GetAction(_state) is { } t && _lexeme.Length > 0) + { + // If tokennumber is int.MinValue it is an ignored token, like typically whitespace. + // In that case, dont return, continue lexing with the reset parser to get the next token. + if (t.number == int.MinValue) + { + // Reset state + _state = _lexer.GetInitialState(); + // Clear lexeme + _lexeme.Clear(); + } + else + { + string str = _lexeme.ToString(); + T value = t.action is null ? default : t.action(str); + LexedToken lx = new LexedToken(value, str, CurrentAbsoluteIndex - str.Length, CurrentLineNumber, CurrentCharacterIndex - str.Length); + + return (t.number, lx); // Token completed. Return it + } + } + else + { + string input = c == '\0' ? "NULL" : c.ToString(); + + // We get here if there is no action at the state where the lexer cannot continue given the input. This fails. + throw new LexerException( + $"Unexpected character '{input}' in '{_currentLine.ToString().TrimStart()}{c} ...' at ({CurrentLineNumber}:{CurrentCharacterIndex})", + lineNumber: CurrentLineNumber, + lineContents: CurrentLine, + characterIndex: CurrentCharacterIndex, + currentAbsoluteIndex: CurrentAbsoluteIndex, + input: input + ); + } + } + else + { + // Peek is still last char. If we are going to be switching lines add to the line number and clear the current line buffer + if (c == '\n') + { + CurrentLineNumber++; + CurrentCharacterIndex = 0; + _currentLine.Clear(); + } + else + _currentLine.Append(c); + + // Machine has not terminated. Switch states, append character to lexeme. + CurrentCharacterIndex++; + CurrentAbsoluteIndex++; + _state = nextState; + _lexeme.Append(c); + _source.Read(); + } + } + } + } + } +} diff --git a/Piglet/Lexer/Runtime/NfaLexer.cs b/Piglet/Lexer/Runtime/NfaLexer.cs index 9093fed..5300ca3 100644 --- a/Piglet/Lexer/Runtime/NfaLexer.cs +++ b/Piglet/Lexer/Runtime/NfaLexer.cs @@ -1,57 +1,69 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using Piglet.Lexer.Construction; - -namespace Piglet.Lexer.Runtime -{ - internal class NfaLexer : LexerBase> - { - private readonly NFA nfa; - private readonly Tuple>>[] actions; - - public NfaLexer(NFA nfa, IEnumerable nfas, List>> tokens, int endOfInputTokenNumber) - : base(endOfInputTokenNumber) - { - this.nfa = nfa; - actions = nfas.Select((n, i) => new Tuple>>(n.States.Single(f => f.AcceptState), new Tuple>( i, - i < tokens.Count ? tokens[i].Item2 : null))).ToArray(); - } - - protected override Tuple> GetAction(HashSet state) - { - // If none of the included states are accepting states we will return null to signal that there is no appropriate - // action to take - if (!state.Any(f => f.AcceptState)) - { - return null; - } - - // Get the first applicable action. This returns null of there is no action defined but there are accepting - // states. This is fine, this means an ignored token. - var action = actions.FirstOrDefault(f => state.Contains(f.Item1)); - return action != null && action.Item2.Item2 != null ? action.Item2 : new Tuple>(int.MinValue, null); - } - - protected override bool ReachedTermination(HashSet nextState) - { - return !nextState.Any(); - } - - protected override HashSet GetNextState(HashSet state, char input) - { - var nextState = new HashSet(); - nextState.UnionWith(nfa.Closure( - nfa.Transitions.Where(t => t.ValidInput.ContainsChar(input) && state.Contains(t.From)).Select(f => f.To). - ToArray())); - return nextState; - } - - protected override HashSet GetInitialState() - { - var initialState = new HashSet(); - initialState.UnionWith(nfa.Closure(new[] { nfa.StartState })); - return initialState; - } - } +using System.Collections.Generic; +using System.Linq; +using System; + +using Piglet.Lexer.Construction; + +namespace Piglet.Lexer.Runtime +{ + internal sealed class NfaLexer + : LexerBase> + { + private readonly NFA _nfa; + private readonly (NFA.State state, (int number, Func? action)? action)?[] _actions; + + + public NfaLexer(NFA nfa, IEnumerable nfas, List<(string regex, Func action)> tokens, int endOfInputTokenNumber) + : base(endOfInputTokenNumber) + { + _nfa = nfa; + _actions = nfas.Select((n, i) => ((NFA.State, (int, Func?)?)?)(n.States.Single(f => f.AcceptState), (i, i < tokens.Count ? tokens[i].action : null))).ToArray(); + } + + protected override (int number, Func? action)? GetAction(HashSet state) + { + // If none of the included states are accepting states we will return null to signal that there is no appropriate action to take + if (!state.Any(f => f.AcceptState)) + return null; + + // Get the first applicable action. This returns null if there is no action defined but there are accepting states. + // This is fine, this means an ignored token. + (NFA.State state, (int index, Func function)? action)? tuple = _actions?.FirstOrDefault(f => state.Contains(f.Value.state)); + + if (tuple?.action is { function: { } }) + return tuple.Value.action; + + return (int.MinValue, null); + } + + protected override bool ReachedTermination(HashSet nextState) => !nextState.Any(); + + protected override HashSet GetNextState(HashSet state, char input) + { + HashSet nextState = new HashSet(); + + nextState.UnionWith(_nfa.Closure( + (from t in _nfa.Transitions + where t.ValidInput.ContainsChar(input) + where state.Contains(t.From) + select t.To).ToArray() + )); + + return nextState; + } + + protected override HashSet GetInitialState() + { + if (_nfa.StartState is NFA.State start) + { + HashSet initialState = new HashSet(); + + initialState.UnionWith(_nfa.Closure(new[] { start })); + + return initialState; + } + else + throw new InvalidOperationException("The start state must not be null."); + } + } } \ No newline at end of file diff --git a/Piglet/Lexer/Runtime/TabularLexer.cs b/Piglet/Lexer/Runtime/TabularLexer.cs index 69e920a..119c22f 100644 --- a/Piglet/Lexer/Runtime/TabularLexer.cs +++ b/Piglet/Lexer/Runtime/TabularLexer.cs @@ -1,36 +1,22 @@ -using System; - -namespace Piglet.Lexer.Runtime -{ - internal class TabularLexer : LexerBase - { - private readonly TransitionTable transitionTable; - - public TabularLexer(TransitionTable transitionTable, int endOfInputTokenNumber) - : base(endOfInputTokenNumber) - { - - this.transitionTable = transitionTable; - } - - protected override bool ReachedTermination(int nextState) - { - return nextState == -1; - } - - protected override int GetNextState(int state, char c) - { - return transitionTable[state, c]; - } - - protected override Tuple> GetAction(int state) - { - return transitionTable.GetAction(state); - } - - protected override int GetInitialState() - { - return 0; - } - } +using System; + +namespace Piglet.Lexer.Runtime +{ + internal sealed class TabularLexer + : LexerBase + { + private readonly TransitionTable _transitionTable; + + + public TabularLexer(TransitionTable transitionTable, int endOfInputTokenNumber) + : base(endOfInputTokenNumber) => _transitionTable = transitionTable; + + protected override bool ReachedTermination(int nextState) => nextState == -1; + + protected override int GetNextState(int state, char c) => _transitionTable[state, c]; + + protected override (int number, Func? action)? GetAction(int state) => _transitionTable.GetAction(state); + + protected override int GetInitialState() => 0; + } } \ No newline at end of file diff --git a/Piglet/Lexer/Runtime/TransitionTable.cs b/Piglet/Lexer/Runtime/TransitionTable.cs index e8f1428..67643dc 100644 --- a/Piglet/Lexer/Runtime/TransitionTable.cs +++ b/Piglet/Lexer/Runtime/TransitionTable.cs @@ -1,144 +1,115 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using Piglet.Common; -using Piglet.Lexer.Construction; - -namespace Piglet.Lexer.Runtime -{ - internal class TransitionTable - { - private readonly ITable2D table; - private readonly Tuple>[] actions; - private readonly char[] inputRangeEnds; - private readonly int[] asciiIndices; - - public TransitionTable(DFA dfa, IList nfas, IList>> tokens) - { - // Get a list of all valid input ranges that are distinct. - // This will fill up the entire spectrum from 0 to max char - // Sort these ranges so that they start with the lowest to highest start - var allValidRanges = - nfas.Select( - f => - f.Transitions.Aggregate(Enumerable.Empty(), (acc, a) => acc.Union(a.ValidInput.Ranges))) - .Aggregate((acc, a) => acc.Union(a)) - .OrderBy(f => f.From) - .ToList(); - - // This list might not be properly terminated at both ends. This happens if there - // never is anything that accepts any character. - char start = allValidRanges.First().From; - if (start != '\0') - { - // Add a range that goes from \0 to the character before start - allValidRanges.Insert(0, new CharRange { From = '\0', To = (char) (start - 1)}); - } - - char end = allValidRanges.Last().To; - if (end != char.MaxValue) - { - allValidRanges.Add(new CharRange { From = (char) (end + 1), To = char.MaxValue}); - } - - // Create a 2D table - // First dimension is the number of states found in the DFA - // Second dimension is number of distinct character ranges - var uncompressed = new short[dfa.States.Count(),allValidRanges.Count()]; - - // Fill table with -1 - for (int i = 0; i < dfa.States.Count(); ++i ) - { - for (int j = 0; j < allValidRanges.Count(); ++j) - { - uncompressed[i, j] = -1; - } - } - - // Save the ends of the input ranges into an array - inputRangeEnds = allValidRanges.Select(f => f.To).ToArray(); - actions = new Tuple>[dfa.States.Count]; - - foreach (var state in dfa.States) - { - // Store to avoid problems with modified closure - DFA.State state1 = state; - foreach (var transition in dfa.Transitions.Where(f => f.From == state1)) - { - // Set the table entry - foreach (var range in transition.ValidInput.Ranges) - { - int ix = allValidRanges.BinarySearch(range); - uncompressed[state.StateNumber, ix] = (short) transition.To.StateNumber; - } - } - - // If this is an accepting state, set the action function to be - // the FIRST defined action function if multiple ones match - if (state.NfaStates.Any(f => f.AcceptState)) - { - // Find the lowest ranking NFA which has the accepting state in it - for (int tokenNumber = 0; tokenNumber < nfas.Count(); ++tokenNumber) - { - NFA nfa = nfas[tokenNumber]; - - if (nfa.States.Intersect(state.NfaStates.Where(f => f.AcceptState)).Any()) - { - // Match - // This might be a token that we ignore. This is if the tokenNumber >= number of tokens - // since the ignored tokens are AFTER the normal tokens. If this is so, set the action func to - // int.MinValue, NULL to signal that the parsing should restart without reporting errors - if (tokenNumber >= tokens.Count()) - { - actions[state.StateNumber] = new Tuple>(int.MinValue, null); - } - else - { - actions[state.StateNumber] = new Tuple>( - tokenNumber, tokens[tokenNumber].Item2); - } - break; - } - } - } - } - - table = new CompressedTable(uncompressed); - asciiIndices = new int[256]; - for (int i = 0; i < asciiIndices.Length; ++i) - { - asciiIndices[i] = FindTableIndexFromRanges((char)i); - } - } - - public int this[int state, char c] - { - get - { - // Determine the corrent input range index into the table - int tableIndex = FindTableIndex(c); - return table[state, tableIndex]; - } - } - - private int FindTableIndex(char c) - { - return c < asciiIndices.Length ? asciiIndices[c] : FindTableIndexFromRanges(c); - } - - private int FindTableIndexFromRanges(char c) - { - int ix = Array.BinarySearch(inputRangeEnds, c); - if (ix < 0) - { - ix = ~ix; - } - return ix; - } - - public Tuple> GetAction(int state) - { - return actions[state]; - } - } +using System.Collections.Generic; +using System.Linq; +using System; + +using Piglet.Lexer.Construction; +using Piglet.Common; + +namespace Piglet.Lexer.Runtime +{ + internal sealed class TransitionTable + { + private readonly (int number, Func? action)?[] _actions; + private readonly char[] _inputRangeEnds; + private readonly int[] _asciiIndices; + private readonly ITable2D _table; + + + public TransitionTable(DFA dfa, IList nfas, IList<(string regex, Func action)> tokens) + { + // Get a list of all valid input ranges that are distinct. + // This will fill up the entire spectrum from 0 to max char + // Sort these ranges so that they start with the lowest to highest start + List allValidRanges = + nfas.Select(f => + f.Transitions.Aggregate(Enumerable.Empty(), (acc, a) => acc.Union(a.ValidInput.Ranges))) + .Aggregate((acc, a) => acc.Union(a)) + .OrderBy(f => f.From) + .ToList(); + + // This list might not be properly terminated at both ends. This happens if there + // never is anything that accepts any character. + char start = allValidRanges.First().From; + + if (start != '\0') + allValidRanges.Insert(0, new CharRange { From = '\0', To = (char)(start - 1) }); // Add a range that goes from \0 to the character before start + + char end = allValidRanges.Last().To; + + if (end != char.MaxValue) + allValidRanges.Add(new CharRange { From = (char)(end + 1), To = char.MaxValue }); + + // Create a 2D table + // First dimension is the number of states found in the DFA + // Second dimension is number of distinct character ranges + short[,] uncompressed = new short[dfa.States.Count(), allValidRanges.Count()]; + + // Fill table with -1 + for (int i = 0; i < dfa.States.Count(); ++i) + for (int j = 0; j < allValidRanges.Count(); ++j) + uncompressed[i, j] = -1; + + // Save the ends of the input ranges into an array + _inputRangeEnds = allValidRanges.Select(f => f.To).ToArray(); + _actions = new (int, Func?)?[dfa.States.Count]; + + foreach (DFA.State state in dfa.States) + { + // Store to avoid problems with modified closure + DFA.State state1 = state; + + foreach (Transition transition in dfa.Transitions.Where(f => f.From == state1)) + // Set the table entry + foreach (CharRange range in transition.ValidInput.Ranges) + { + int ix = allValidRanges.BinarySearch(range); + + uncompressed[state.StateNumber, ix] = (short) transition.To.StateNumber; + } + + // If this is an accepting state, set the action function to be + // the FIRST defined action function if multiple ones match + if (state.NfaStates.Any(f => f.AcceptState)) + // Find the lowest ranking NFA which has the accepting state in it + for (int tokenNumber = 0; tokenNumber < nfas.Count(); ++tokenNumber) + { + NFA nfa = nfas[tokenNumber]; + + if (nfa.States.Intersect(state.NfaStates.Where(f => f.AcceptState)).Any()) + { + // Match + // This might be a token that we ignore. This is if the tokenNumber >= number of tokens + // since the ignored tokens are AFTER the normal tokens. If this is so, set the action func to + // int.MinValue, NULL to signal that the parsing should restart without reporting errors + _actions[state.StateNumber] = tokenNumber >= tokens.Count() ? (int.MinValue, null) : (tokenNumber, tokens[tokenNumber].action); + + break; + } + } + } + + _table = new CompressedTable(uncompressed); + _asciiIndices = new int[256]; + + for (int i = 0; i < _asciiIndices.Length; ++i) + _asciiIndices[i] = FindTableIndexFromRanges((char)i); + } + + // Determine the corrent input range index into the table + public int this[int state, char c] => _table[state, FindTableIndex(c)]; + + private int FindTableIndex(char c) => c < _asciiIndices.Length ? _asciiIndices[c] : FindTableIndexFromRanges(c); + + private int FindTableIndexFromRanges(char c) + { + int ix = Array.BinarySearch(_inputRangeEnds, c); + + if (ix < 0) + ix = ~ix; + + return ix; + } + + public (int number, Func? action)? GetAction(int state) => _actions[state]; + } } \ No newline at end of file diff --git a/Piglet/Parser/Configuration/Fluent/FluentExpression.cs b/Piglet/Parser/Configuration/Fluent/FluentExpression.cs index 5d7d19a..db7f8f2 100644 --- a/Piglet/Parser/Configuration/Fluent/FluentExpression.cs +++ b/Piglet/Parser/Configuration/Fluent/FluentExpression.cs @@ -1,71 +1,47 @@ -using System; using System.Globalization; +using System; namespace Piglet.Parser.Configuration.Fluent { - internal class FluentExpression : IExpressionConfigurator, IExpressionReturnConfigurator + internal sealed class FluentExpression + : IExpressionConfigurator + , IExpressionReturnConfigurator { - private readonly ParserConfigurator configurator; - private Terminal terminal; - private string regex; + private readonly ParserConfigurator? _configurator; + private Terminal? _terminal; + private string? _regex; - public FluentExpression(ParserConfigurator configurator) - { - this.configurator = configurator; - } - public FluentExpression(ITerminal terminal) - { - this.terminal = (Terminal) terminal; - } + public FluentExpression(ParserConfigurator configurator) => _configurator = configurator; - public Terminal Terminal - { - get - { - if (terminal == null) - { - throw new ParserConfigurationException("An expression must be fully configured before use!"); - } - return terminal; - } - } + public FluentExpression(ITerminal terminal) => _terminal = (Terminal)terminal; + + public Terminal Terminal => _terminal ?? throw new ParserConfigurationException("An expression must be fully configured before use!"); public void ThatMatches() { - var type = typeof (TExpressionType); + Type type = typeof(TExpressionType); + if (type == typeof(int)) - { ThatMatches(@"\d+").AndReturns(f => int.Parse(f)); - } else if (type == typeof(double)) - { ThatMatches(@"\d+(\.\d+)?").AndReturns(f => double.Parse(f, CultureInfo.InvariantCulture)); - } else if (type == typeof(float)) - { ThatMatches(@"\d+(\.\d+)?").AndReturns(f => float.Parse(f)); - } else if (type == typeof(bool)) - { ThatMatches(@"((true)|(false))").AndReturns(f => bool.Parse(f)); - } else - { throw new ParserConfigurationException("Unknown type passed to ThatMatches."); - } } public IExpressionReturnConfigurator ThatMatches(string regex) { - this.regex = regex; + _regex = regex; + return this; } - public void AndReturns(Func func) - { - // Create the terminal now to ensure that the tokens will be created in the right order - terminal = (Terminal) configurator.CreateTerminal(regex, func); - } + // Create the terminal now to ensure that the tokens will be created in the right order + public void AndReturns(Func func) => _terminal = (Terminal?)_configurator?.CreateTerminal(_regex, func); } } \ No newline at end of file diff --git a/Piglet/Parser/Configuration/Fluent/FluentParserConfigurator.cs b/Piglet/Parser/Configuration/Fluent/FluentParserConfigurator.cs index 35719d2..71d1f1f 100644 --- a/Piglet/Parser/Configuration/Fluent/FluentParserConfigurator.cs +++ b/Piglet/Parser/Configuration/Fluent/FluentParserConfigurator.cs @@ -1,160 +1,145 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text.RegularExpressions; -using Piglet.Lexer.Configuration; - -namespace Piglet.Parser.Configuration.Fluent -{ - internal class FluentParserConfigurator : IFluentParserConfigurator - { - private readonly ParserConfigurator configurator; - private readonly List rules; - private readonly Dictionary, NonTerminal> listRules; - private readonly Dictionary, NonTerminal> optionalRules; - private readonly List ignored; - - private IExpressionConfigurator quotedString; - private IExpressionConfigurator errorToken; - - public FluentParserConfigurator(ParserConfigurator configurator) - { - this.configurator = configurator; - - rules = new List(); - listRules = new Dictionary, NonTerminal>(); - optionalRules = new Dictionary, NonTerminal>(); - ignored = new List(); - } - - public IRule Rule() - { - var rule = new FluentRule(this, configurator.CreateNonTerminal()); - rules.Add(rule); - return rule; - } - - public IExpressionConfigurator Expression() - { - return new FluentExpression(configurator); - } - - public IExpressionConfigurator QuotedString - { - get - { - if (quotedString == null) - { - quotedString = Expression(); - quotedString.ThatMatches("\"(\\\\.|[^\"])*\"").AndReturns(f => f.Substring(1, f.Length - 2)); - } - return quotedString; - } - } - - public IExpressionConfigurator Error - { - get { return errorToken ?? (errorToken = new FluentExpression(configurator.ErrorToken)); } - } - - public IParser CreateParser() - { - // At this point the underlying parser configurator contains a bunch of nonterminals - // It won't contain all of the nonterminals. We are going to replace everything in every rule with the proper - // [non]terminals. Then we are going to generate the parser. - foreach (var rule in rules) - { - rule.ConfigureProductions(); - } - - configurator.LexerSettings.CreateLexer = true; - configurator.LexerSettings.EscapeLiterals = true; - configurator.LexerSettings.Ignore = new[] { @"\s+" }.Concat(ignored).ToArray(); - - var parser = configurator.CreateParser(); - parser.Lexer = configurator.CreateLexer(); - - return parser; - } - - private ITerminal[] ParamsToTerminalArray(object[] p) - { - return p.OfType().Select(f => configurator.CreateTerminal(Regex.Escape(f))) - .Concat(p.OfType().Select(f => f.Terminal)).ToArray(); - } - - public void LeftAssociative(params object[] p) - { - configurator.LeftAssociative(ParamsToTerminalArray(p)); - } - - public void RightAssociative(params object[] p) - { - configurator.RightAssociative(ParamsToTerminalArray(p)); - } - - public void NonAssociative(params object[] p) - { - configurator.NonAssociative(ParamsToTerminalArray(p)); - } - - public void Ignore(string ignoreExpression) - { - ignored.Add(ignoreExpression); - } - - public LexerRuntime Runtime { - get { return configurator.LexerSettings.Runtime; } - set { configurator.LexerSettings.Runtime = value; } - } - - public NonTerminal MakeListRule(IRule rule, string separator) - { - var t = new Tuple(rule, separator); - if (listRules.ContainsKey(t)) - return listRules[t]; - - // Create a new nonterminal - var listRule = (NonTerminal)configurator.CreateNonTerminal(); - - if (separator != null) - { - listRule.AddProduction(listRule, separator, ((FluentRule)rule).NonTerminal).SetReduceFunction(f => - { - var list = (List)f[0]; - list.Add((TListType)f[2]); - return list; - }); - } - else - { - listRule.AddProduction(listRule, ((FluentRule)rule).NonTerminal).SetReduceFunction(f => - { - var list = (List)f[0]; - list.Add((TListType)f[1]); - return list; - }); - } - listRule.AddProduction(((FluentRule)rule).NonTerminal).SetReduceFunction(f => new List { (TListType)f[0] }); - - listRules.Add(t, listRule); - return listRule; - } - - public NonTerminal MakeOptionalRule(NonTerminal nonTerminal) - { - if (optionalRules.ContainsKey(nonTerminal)) - return optionalRules[nonTerminal]; - - // Makes a new rule - var optionalRule = (NonTerminal)configurator.CreateNonTerminal(); - - optionalRule.AddProduction(nonTerminal).SetReduceFunction(f => f[0]); - optionalRule.AddProduction(); - - optionalRules.Add(nonTerminal, optionalRule); - - return optionalRule; - } - } +using System.Text.RegularExpressions; +using System.Collections.Generic; +using System.Linq; + +using Piglet.Lexer.Configuration; + +namespace Piglet.Parser.Configuration.Fluent +{ + internal sealed class FluentParserConfigurator + : IFluentParserConfigurator + { + private readonly Dictionary<(IRule? rule, string? separator), NonTerminal> _listRules; + private readonly Dictionary, NonTerminal> _optionalRules; + private readonly ParserConfigurator _configurator; + private readonly List _rules; + private readonly List _ignored; + + private IExpressionConfigurator? _quotedString; + private IExpressionConfigurator? _errorToken; + + + public LexerRuntime Runtime + { + get => _configurator.LexerSettings.Runtime; + set => _configurator.LexerSettings.Runtime = value; + } + + public IExpressionConfigurator Error => _errorToken ?? (_errorToken = new FluentExpression(_configurator.ErrorToken)); + + public IExpressionConfigurator QuotedString + { + get + { + if (_quotedString is null) + { + _quotedString = Expression(); + _quotedString.ThatMatches("\"(\\\\.|[^\"])*\"").AndReturns(f => f.Substring(1, f.Length - 2)); + } + + return _quotedString; + } + } + + + public FluentParserConfigurator(ParserConfigurator configurator) + { + _configurator = configurator; + _rules = new List(); + _listRules = new Dictionary<(IRule, string), NonTerminal>(); + _optionalRules = new Dictionary, NonTerminal>(); + _ignored = new List(); + } + + public IRule Rule() + { + FluentRule rule = new FluentRule(this, _configurator.CreateNonTerminal()); + + _rules.Add(rule); + + return rule; + } + + public IExpressionConfigurator Expression() => new FluentExpression(_configurator); + + public IParser CreateParser() + { + // At this point the underlying parser configurator contains a bunch of nonterminals + // It won't contain all of the nonterminals. We are going to replace everything in every rule with the proper + // [non]terminals. Then we are going to generate the parser. + foreach (FluentRule rule in _rules) + rule.ConfigureProductions(); + + _configurator.LexerSettings.CreateLexer = true; + _configurator.LexerSettings.EscapeLiterals = true; + _configurator.LexerSettings.Ignore = new[] { @"\s+" }.Concat(_ignored).ToArray(); + + IParser parser = _configurator.CreateParser(); + + parser.Lexer = _configurator.CreateLexer(); + + return parser; + } + + private ITerminal[] ParamsToTerminalArray(object[] p) => p.OfType().Select(f => _configurator.CreateTerminal(Regex.Escape(f))) + .Concat(p.OfType().Select(f => f.Terminal)).ToArray(); + + public void LeftAssociative(params object[] p) => _configurator.LeftAssociative(ParamsToTerminalArray(p)); + + public void RightAssociative(params object[] p) => _configurator.RightAssociative(ParamsToTerminalArray(p)); + + public void NonAssociative(params object[] p) => _configurator.NonAssociative(ParamsToTerminalArray(p)); + + public void Ignore(string ignoreExpression) => _ignored.Add(ignoreExpression); + + public NonTerminal MakeListRule(IRule? rule, string? separator) + { + (IRule? rule, string? separator) tuple = (rule, separator); + + if (_listRules.ContainsKey(tuple)) + return _listRules[tuple]; + + // Create a new nonterminal + NonTerminal listRule = (NonTerminal)_configurator.CreateNonTerminal(); + + if (separator != null) + listRule.AddProduction(listRule, separator, ((FluentRule?)rule)?.NonTerminal).SetReduceFunction(f => + { + List list = (List)f[0]; + + list.Add((TListType)f[2]); + + return list; + }); + else + listRule.AddProduction(listRule, ((FluentRule?)rule)?.NonTerminal).SetReduceFunction(f => + { + List list = (List)f[0]; + + list.Add((TListType)f[1]); + + return list; + }); + + listRule.AddProduction(((FluentRule?)rule)?.NonTerminal).SetReduceFunction(f => new List { (TListType)f[0] }); + _listRules.Add(tuple, listRule); + + return listRule; + } + + public NonTerminal MakeOptionalRule(NonTerminal nonTerminal) + { + if (_optionalRules.ContainsKey(nonTerminal)) + return _optionalRules[nonTerminal]; + + // Makes a new rule + NonTerminal optionalRule = (NonTerminal)_configurator.CreateNonTerminal(); + + optionalRule.AddProduction(nonTerminal).SetReduceFunction(f => f[0]); + optionalRule.AddProduction(); + _optionalRules.Add(nonTerminal, optionalRule); + + return optionalRule; + } + } } \ No newline at end of file diff --git a/Piglet/Parser/Configuration/Fluent/FluentRule.cs b/Piglet/Parser/Configuration/Fluent/FluentRule.cs index d759e1d..471d89d 100644 --- a/Piglet/Parser/Configuration/Fluent/FluentRule.cs +++ b/Piglet/Parser/Configuration/Fluent/FluentRule.cs @@ -1,55 +1,65 @@ -using System; using System.Collections.Generic; using System.Dynamic; using System.Linq; +using System; namespace Piglet.Parser.Configuration.Fluent { - internal class FluentRule : IRuleByConfigurator, IRule, IListItemConfigurator, IOptionalAsConfigurator, IMaybeListNamed + internal sealed class FluentRule + : IRuleByConfigurator + , IRule + , IListItemConfigurator + , IOptionalAsConfigurator + , IMaybeListNamed { - private readonly FluentParserConfigurator configurator; - private readonly NonTerminal nonTerminal; - private readonly List> productionList; - private readonly List> funcList; + private readonly FluentParserConfigurator _configurator; + private readonly NonTerminal _nonTerminal; + private readonly List> _productionList; + private readonly List?> _funcList; - private class ProductionElement - { - public object Symbol; - public string Name; - }; - private abstract class ListOfRule : ProductionElement - { - public string Separator; - public bool Optional; + private List CurrentProduction => _productionList[_productionList.Count - 1]; - public abstract NonTerminal MakeListRule(FluentParserConfigurator fluentParserConfigurator); - }; - - private class ListOfTypedObjectRule : ListOfRule + public IListItemConfigurator Optional { - public override NonTerminal MakeListRule(FluentParserConfigurator fluentParserConfigurator) + get { - return fluentParserConfigurator.MakeListRule((IRule)Symbol, Separator); + ((ListOfRule)CurrentProduction[CurrentProduction.Count - 1]).Optional = true; + + return this; } } - public FluentRule(FluentParserConfigurator configurator, INonTerminal nonTerminal) - { - this.configurator = configurator; - this.nonTerminal = (NonTerminal)nonTerminal; - productionList = new List> { new List() }; - funcList = new List> { null }; - } + public INonTerminal NonTerminal => _nonTerminal; - private List CurrentProduction + public IRuleByConfigurator Or { get { - return productionList[productionList.Count - 1]; + // Finish the current rule + _productionList.Add(new List()); + + _funcList.Add(null); + + return this; } } + public IRuleByConfigurator IsMadeUp => this; + + public IListItemConfigurator ThatIs => this; + + public IRuleByConfigurator Followed => this; + + + public FluentRule(FluentParserConfigurator configurator, INonTerminal nonTerminal) + { + _configurator = configurator; + _nonTerminal = (NonTerminal)nonTerminal; + _productionList = new List> { new List() }; + _funcList = new List?> { null }; + } + public IOptionalAsConfigurator By(string literal) { CurrentProduction.Add(new ProductionElement { Symbol = literal }); @@ -58,7 +68,7 @@ public IOptionalAsConfigurator By(string literal) public IOptionalAsConfigurator By() { - var e = configurator.Expression(); + IExpressionConfigurator e = _configurator.Expression(); e.ThatMatches(); CurrentProduction.Add(new ProductionElement { Symbol = e }); return this; @@ -76,10 +86,7 @@ public IOptionalAsConfigurator By(IRule rule) return this; } - public IMaybeListNamed ByListOf(IRule listElement) - { - return ByListOf(listElement); - } + public IMaybeListNamed ByListOf(IRule listElement) => ByListOf(listElement); public IMaybeListNamed ByListOf(IRule listElement) { @@ -87,25 +94,9 @@ public IMaybeListNamed ByListOf(IRule listElement) return this; } - public IRuleByConfigurator Or - { - get - { - // Finish the current rule - productionList.Add(new List()); - funcList.Add(null); - return this; - } - } - - public IRuleByConfigurator Followed - { - get { return this; } - } - public IMaybeNewRuleConfigurator WhenFound(Func func) { - funcList[funcList.Count - 1] = func; + _funcList[_funcList.Count - 1] = func; return this; } @@ -115,43 +106,18 @@ public IRuleSequenceConfigurator As(string name) return this; } - public IRuleByConfigurator IsMadeUp - { - get { return this; } - } - - public IListItemConfigurator ThatIs - { - get { return this; } - } - IListRuleSequenceConfigurator IMaybeListNamed.As(string name) { CurrentProduction[CurrentProduction.Count - 1].Name = name; + return this; } public IListItemConfigurator SeparatedBy(string separator) { ((ListOfRule)CurrentProduction[CurrentProduction.Count - 1]).Separator = separator; - return this; - } - public IListItemConfigurator Optional - { - get - { - ((ListOfRule)CurrentProduction[CurrentProduction.Count - 1]).Optional = true; - return this; - } - } - - public INonTerminal NonTerminal - { - get - { - return nonTerminal; - } + return this; } public void ConfigureProductions() @@ -160,24 +126,24 @@ public void ConfigureProductions() // and sends that to the other configuration interface. // Use the nonterminal to configure the production - for (var productionIndex = 0; productionIndex < productionList.Count; ++productionIndex) + for (int productionIndex = 0; productionIndex < _productionList.Count; ++productionIndex) { - var production = productionList[productionIndex]; + List production = _productionList[productionIndex]; bool isErrorRule = false; for (int i = 0; i < production.Count; ++i) { - var part = production[i]; + ProductionElement part = production[i]; + if (part is ListOfRule) { // This will create new rules, we want to reduce production[i] - var listRule = (ListOfRule)part; - var listNonTerminal = listRule.MakeListRule(configurator); + ListOfRule listRule = (ListOfRule)part; + NonTerminal listNonTerminal = listRule.MakeListRule(_configurator); if (listRule.Optional) - { - listNonTerminal = configurator.MakeOptionalRule(listNonTerminal); - } + listNonTerminal = _configurator.MakeOptionalRule(listNonTerminal); + production[i].Symbol = listNonTerminal; } else if (part.Symbol is string) @@ -186,67 +152,77 @@ public void ConfigureProductions() // Do nothing, this is already handled. } else if (part.Symbol is FluentRule) - { - production[i].Symbol = ((FluentRule)part.Symbol).nonTerminal; - } + production[i].Symbol = ((FluentRule)part.Symbol)._nonTerminal; else if (part.Symbol is FluentExpression) { - isErrorRule |= part.Symbol == configurator.Error; + isErrorRule |= part.Symbol == _configurator.Error; production[i].Symbol = ((FluentExpression)part.Symbol).Terminal; } else - { - throw new ParserConfigurationException( - "Unknown entity found in production rule list. This should never happen"); - } + throw new ParserConfigurationException("Unknown entity found in production rule list. This should never happen"); } - var newProduction = nonTerminal.AddProduction(production.Select(f => f.Symbol).ToArray()); + IProduction newProduction = _nonTerminal.AddProduction(production.Select(f => f.Symbol).ToArray()); // If there is no specific rule specified. - var func = funcList[productionIndex]; - if (func == null) + Func? func = _funcList[productionIndex]; + + if (func is null) { if (production.Count == 1) - { // Use default rule where all rules of length 1 will autoreduce to the // first propertys semantic value newProduction.SetReduceFunction(f => f[0]); - } } else { // Specific function found. This needs to be wrapped in another function // which translates the index parameters into a dynamic object by the property names - var indexNames = production.Select((f, index) => new Tuple(index, f.Name)).Where(f => f.Item2 != null).ToArray(); + (int index, string name)[] indexNames = production.Select((f, i) => (i, f.Name)).Where(f => f.Item2 is { }).ToArray()!; if (isErrorRule) - { newProduction.SetErrorFunction((e, f) => func(CreateExpandoObject(f, e, indexNames))); - } else - { newProduction.SetReduceFunction(f => func(CreateExpandoObject(f, null, indexNames))); - } } } } - private static ExpandoObject CreateExpandoObject(object[] f, object error, Tuple[] indexNames) + private static ExpandoObject CreateExpandoObject(object[] f, object? error, (int index, string name)[] indexNames) { - var expandoObject = new ExpandoObject(); - var dictionary = ((IDictionary)expandoObject); + ExpandoObject expandoObject = new ExpandoObject(); + IDictionary dic = expandoObject; - foreach (var indexName in indexNames) - { - dictionary.Add(indexName.Item2, f[indexName.Item1]); - } + foreach ((int idx, string name) in indexNames) + dic[name] = f[idx]; + + if (error is { } e) + dic["Error"] = e; - if (error != null) - { - dictionary["Error"] = error; - } return expandoObject; } + + + private class ProductionElement + { + public object? Symbol; + public string? Name; + }; + + private abstract class ListOfRule + : ProductionElement + { + public string? Separator; + public bool Optional; + + + public abstract NonTerminal MakeListRule(FluentParserConfigurator fluentParserConfigurator); + }; + + private sealed class ListOfTypedObjectRule + : ListOfRule + { + public override NonTerminal MakeListRule(FluentParserConfigurator fluentParserConfigurator) => fluentParserConfigurator.MakeListRule(Symbol as IRule, Separator); + } } } diff --git a/Piglet/Parser/Configuration/Fluent/IFluentParserConfigurator.cs b/Piglet/Parser/Configuration/Fluent/IFluentParserConfigurator.cs index f2da41e..f848688 100644 --- a/Piglet/Parser/Configuration/Fluent/IFluentParserConfigurator.cs +++ b/Piglet/Parser/Configuration/Fluent/IFluentParserConfigurator.cs @@ -1,4 +1,5 @@ using System; + using Piglet.Lexer.Configuration; namespace Piglet.Parser.Configuration.Fluent @@ -6,7 +7,8 @@ namespace Piglet.Parser.Configuration.Fluent /// /// A configuration object for creating fluently configured parsers. /// - public interface IFluentParserConfigurator : IHideObjectMembers + public interface IFluentParserConfigurator + : IHideObjectMembers { /// /// Create a new rule @@ -71,7 +73,8 @@ public interface IFluentParserConfigurator : IHideObjectMembers /// /// A rule is a configurable rule entity. /// - public interface IRule : IHideObjectMembers + public interface IRule + : IHideObjectMembers { /// /// Specify what this rule is made up by @@ -82,7 +85,8 @@ public interface IRule : IHideObjectMembers /// /// An expression is a terminal token, and this is the configurator object for setting what the expression should match /// - public interface IExpressionConfigurator : IHideObjectMembers + public interface IExpressionConfigurator + : IHideObjectMembers { /// /// Match a type @@ -101,7 +105,8 @@ public interface IExpressionConfigurator : IHideObjectMembers /// /// Allows you to specify the return of the expression /// - public interface IExpressionReturnConfigurator : IHideObjectMembers + public interface IExpressionReturnConfigurator + : IHideObjectMembers { /// /// Specify what the expression, when matched should return. @@ -112,7 +117,8 @@ public interface IExpressionReturnConfigurator : IHideObjectMembers } #pragma warning disable 1591 - public interface IRuleByConfigurator : IHideObjectMembers + public interface IRuleByConfigurator + : IHideObjectMembers #pragma warning restore 1591 { /// @@ -164,7 +170,8 @@ public interface IRuleByConfigurator : IHideObjectMembers #pragma warning disable 1591 - public interface IOptionalAsConfigurator : IRuleSequenceConfigurator + public interface IOptionalAsConfigurator + : IRuleSequenceConfigurator #pragma warning restore 1591 { /// @@ -176,7 +183,8 @@ public interface IOptionalAsConfigurator : IRuleSequenceConfigurator } #pragma warning disable 1591 - public interface IMaybeNewRuleConfigurator : IHideObjectMembers + public interface IMaybeNewRuleConfigurator + : IHideObjectMembers #pragma warning restore 1591 { /// @@ -186,7 +194,8 @@ public interface IMaybeNewRuleConfigurator : IHideObjectMembers } #pragma warning disable 1591 - public interface IRuleSequenceConfigurator : IMaybeNewRuleConfigurator + public interface IRuleSequenceConfigurator + : IMaybeNewRuleConfigurator #pragma warning restore 1591 { /// @@ -205,7 +214,8 @@ public interface IRuleSequenceConfigurator : IMaybeNewRuleConfigurator } #pragma warning disable 1591 - public interface IMaybeListNamed : IListRuleSequenceConfigurator + public interface IMaybeListNamed + : IListRuleSequenceConfigurator #pragma warning restore 1591 { /// @@ -218,7 +228,8 @@ public interface IMaybeListNamed : IListRuleSequenceConfigurator } #pragma warning disable 1591 - public interface IListRuleSequenceConfigurator : IRuleSequenceConfigurator + public interface IListRuleSequenceConfigurator + : IRuleSequenceConfigurator #pragma warning restore 1591 { /// @@ -228,7 +239,8 @@ public interface IListRuleSequenceConfigurator : IRuleSequenceConfigurator } #pragma warning disable 1591 - public interface IListItemConfigurator : IRuleSequenceConfigurator + public interface IListItemConfigurator + : IRuleSequenceConfigurator #pragma warning restore 1591 { /// diff --git a/Piglet/Parser/Configuration/Fluent/IHideObjectMembers.cs b/Piglet/Parser/Configuration/Fluent/IHideObjectMembers.cs index b2beca7..4eb7090 100644 --- a/Piglet/Parser/Configuration/Fluent/IHideObjectMembers.cs +++ b/Piglet/Parser/Configuration/Fluent/IHideObjectMembers.cs @@ -1,5 +1,6 @@ using System; using System.ComponentModel; +using System.Diagnostics.CodeAnalysis; namespace Piglet.Parser.Configuration.Fluent { @@ -15,6 +16,7 @@ public interface IHideObjectMembers [EditorBrowsable(EditorBrowsableState.Never)] + [return: MaybeNull] string ToString(); [EditorBrowsable(EditorBrowsableState.Never)] diff --git a/Piglet/Parser/Configuration/Generic/ParsingUitilities.cs b/Piglet/Parser/Configuration/Generic/ParsingUitilities.cs new file mode 100644 index 0000000..e593f2e --- /dev/null +++ b/Piglet/Parser/Configuration/Generic/ParsingUitilities.cs @@ -0,0 +1,1817 @@ + +/////////////////////////////////////////////////////////////////////// +// AUTOGENERATED 2020-06-11 22:00:48.625802 // +// All your changes to this file will be lost upon re-generation. // +/////////////////////////////////////////////////////////////////////// + +using System.Diagnostics.CodeAnalysis; +using System.Collections.Generic; +using System.Linq; +using System; + +using Piglet.Parser.Construction; +using Piglet.Lexer.Runtime; +using Piglet.Lexer; + + +namespace Piglet.Parser.Configuration.Generic +{ + /// + /// Represents an abstract generic parser constructor. + /// + /// The parser based on this constructor will return a parsed value of the type . + /// + /// The generic value return type of the parser. + public abstract class ParserConstructor + { + private ParserWrapper? _parser = null; + private volatile int _ntcounter = 0; + + /// + /// The parser configuration. + /// + public IParserConfigurator Configurator { get; } + + + /// + /// Creates a new generic parser constructor with the default parser configuration. + /// + public ParserConstructor() + : this(ParserFactory.Configure()) + { + } + + /// + /// Creates a new generic parser constructor with the given parser configuration. + /// + /// Parser configuration. + public ParserConstructor(IParserConfigurator configurator) => Configurator = configurator; + + /// + /// Creates a new non-terminal symbol with the given generic semantic value and name. + /// + /// Generic semantic value stored inside the new non-terminal symbol. + /// The name of the new non-terminal symbol. + /// The newly created non-terminal symbol. + protected NonTerminalWrapper CreateNonTerminal(string name) => new NonTerminalWrapper(Configurator.CreateNonTerminal(name)); + + /// + /// Creates a new non-terminal symbol with the given generic semantic value and the default name for non-terminals ("NT..."). + /// + /// Generic semantic value stored inside the new non-terminal symbol. + /// The newly created non-terminal symbol. + protected NonTerminalWrapper CreateNonTerminal() => CreateNonTerminal($"NT{++_ntcounter}"); + + /// + /// Creates a new terminal symbol associated with the given regex string and generic value. + /// + /// Generic semantic value stored inside the new terminal symbol. + /// The regex string associated with the terminal symbol. + /// The value stored inside the new terminal value. + /// The newly created terminal symbol. + protected TerminalWrapper CreateTerminal(string regex, T value) => CreateTerminal(regex, _ => value); + + /// + /// Creates a new terminal symbol associated with the given regex string and the function providing the generic value. + /// + /// Generic semantic value stored inside the new terminal symbol. + /// The regex string associated with the terminal symbol. + /// The function providing the generic value represented by the terminal symbol. + /// The newly created terminal symbol. + protected TerminalWrapper CreateTerminal(string regex, Func func) => new TerminalWrapper(Configurator.CreateTerminal(regex, s => func(s))); + + /// + /// Creates a new terminal symbol associated with the given regex string and the identity function (of the type ). + /// + /// The regex string associated with the terminal symbol. + /// The newly created terminal symbol. + protected TerminalWrapper CreateTerminal(string regex) => new TerminalWrapper(Configurator.CreateTerminal(regex)); + + /// + /// Sets the precedence for all given symbols in ascending order. The first symbol group is therefore considered to have the lowest precedence and the last symbol group the highest precedence. + /// + /// Ordered collection of groups containing a set of symbols with their corresponding associativity. + protected void SetPrecedenceList(params (AssociativityDirection direction, ITerminalWrapper[] symbols)[] groups) + { + foreach ((AssociativityDirection d, ITerminalWrapper[] s) in groups) + SetAssociativity(d, s); + } + + /// + /// Sets the given associativity to all symbols in the given symbol collection. All sybols will be considered to have the same precedence group. + /// + /// Associativity direction. + /// Target symbols. + /// The precedence group associated with the given symbols. + protected IPrecedenceGroup SetAssociativity(AssociativityDirection dir, params ITerminalWrapper[] symbols) + { + ITerminal[] arr = symbols.Select(s => s.Symbol).ToArray(); + + switch (dir) + { + case AssociativityDirection.Left: + return Configurator.LeftAssociative(arr); + case AssociativityDirection.Right: + return Configurator.RightAssociative(arr); + default: + throw new ArgumentOutOfRangeException(nameof(dir)); + } + } + + /// + /// Creates a new production rule on the given non-terminal symbol using the given production function. + ///
+ /// This production represents the reducing of the given non-terminal to the given symbol. + ///
+ /// The generic type of . + /// The generic type of . + /// The non-terminal symbol which gets reduced. + /// The symbol, to which gets reduced. + protected void CreateProduction(NonTerminalWrapper non_terminal, SymbolWrapper symbol0) => non_terminal.AddProduction(symbol0).SetReduceToFirst(); + + /// + /// Creates a new production rule on the given non-terminal symbol using the given production function. + ///
+ /// This production represents the reducing of the given non-terminal to the 0 given symbols. + ///
+ /// The non-terminal symbol which gets reduced. + /// The generic type of . + /// The generic production function which gets called upon reduction. The function accepts the value stored inside the produced symbols, and returns the value to be stored inside the non-terminal symbol "". + /// The newly created production rule. + protected ProductionWrapper CreateProduction(NonTerminalWrapper non_terminal, Func func) => non_terminal.AddProduction().SetReduceFunction(func); + + /// + /// Creates a new production rule on the given non-terminal symbol using the given production function. + ///
+ /// This production represents the reducing of the given non-terminal to the 1 given symbols. + ///
+ /// The non-terminal symbol which gets reduced. + /// The generic type of . + /// The generic type of . + /// The symbol no. 0, to which the current non-terminal symbol gets reduced. + /// The generic production function which gets called upon reduction. The function accepts the value stored inside the produced symbols, and returns the value to be stored inside the non-terminal symbol "". + /// The newly created production rule. + protected ProductionWrapper CreateProduction(NonTerminalWrapper non_terminal, SymbolWrapper symbol0, Func func) => non_terminal.AddProduction(symbol0).SetReduceFunction(func); + + /// + /// Creates a new production rule on the given non-terminal symbol using the given production function. + ///
+ /// This production represents the reducing of the given non-terminal to the 2 given symbols. + ///
+ /// The non-terminal symbol which gets reduced. + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The symbol no. 0, to which the current non-terminal symbol gets reduced. + /// The symbol no. 1, to which the current non-terminal symbol gets reduced. + /// The generic production function which gets called upon reduction. The function accepts the value stored inside the produced symbols, and returns the value to be stored inside the non-terminal symbol "". + /// The newly created production rule. + protected ProductionWrapper CreateProduction(NonTerminalWrapper non_terminal, SymbolWrapper symbol0, SymbolWrapper symbol1, Func func) => non_terminal.AddProduction(symbol0, symbol1).SetReduceFunction(func); + + /// + /// Creates a new production rule on the given non-terminal symbol using the given production function. + ///
+ /// This production represents the reducing of the given non-terminal to the 3 given symbols. + ///
+ /// The non-terminal symbol which gets reduced. + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The symbol no. 0, to which the current non-terminal symbol gets reduced. + /// The symbol no. 1, to which the current non-terminal symbol gets reduced. + /// The symbol no. 2, to which the current non-terminal symbol gets reduced. + /// The generic production function which gets called upon reduction. The function accepts the value stored inside the produced symbols, and returns the value to be stored inside the non-terminal symbol "". + /// The newly created production rule. + protected ProductionWrapper CreateProduction(NonTerminalWrapper non_terminal, SymbolWrapper symbol0, SymbolWrapper symbol1, SymbolWrapper symbol2, Func func) => non_terminal.AddProduction(symbol0, symbol1, symbol2).SetReduceFunction(func); + + /// + /// Creates a new production rule on the given non-terminal symbol using the given production function. + ///
+ /// This production represents the reducing of the given non-terminal to the 4 given symbols. + ///
+ /// The non-terminal symbol which gets reduced. + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The symbol no. 0, to which the current non-terminal symbol gets reduced. + /// The symbol no. 1, to which the current non-terminal symbol gets reduced. + /// The symbol no. 2, to which the current non-terminal symbol gets reduced. + /// The symbol no. 3, to which the current non-terminal symbol gets reduced. + /// The generic production function which gets called upon reduction. The function accepts the value stored inside the produced symbols, and returns the value to be stored inside the non-terminal symbol "". + /// The newly created production rule. + protected ProductionWrapper CreateProduction(NonTerminalWrapper non_terminal, SymbolWrapper symbol0, SymbolWrapper symbol1, SymbolWrapper symbol2, SymbolWrapper symbol3, Func func) => non_terminal.AddProduction(symbol0, symbol1, symbol2, symbol3).SetReduceFunction(func); + + /// + /// Creates a new production rule on the given non-terminal symbol using the given production function. + ///
+ /// This production represents the reducing of the given non-terminal to the 5 given symbols. + ///
+ /// The non-terminal symbol which gets reduced. + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The symbol no. 0, to which the current non-terminal symbol gets reduced. + /// The symbol no. 1, to which the current non-terminal symbol gets reduced. + /// The symbol no. 2, to which the current non-terminal symbol gets reduced. + /// The symbol no. 3, to which the current non-terminal symbol gets reduced. + /// The symbol no. 4, to which the current non-terminal symbol gets reduced. + /// The generic production function which gets called upon reduction. The function accepts the value stored inside the produced symbols, and returns the value to be stored inside the non-terminal symbol "". + /// The newly created production rule. + protected ProductionWrapper CreateProduction(NonTerminalWrapper non_terminal, SymbolWrapper symbol0, SymbolWrapper symbol1, SymbolWrapper symbol2, SymbolWrapper symbol3, SymbolWrapper symbol4, Func func) => non_terminal.AddProduction(symbol0, symbol1, symbol2, symbol3, symbol4).SetReduceFunction(func); + + /// + /// Creates a new production rule on the given non-terminal symbol using the given production function. + ///
+ /// This production represents the reducing of the given non-terminal to the 6 given symbols. + ///
+ /// The non-terminal symbol which gets reduced. + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The symbol no. 0, to which the current non-terminal symbol gets reduced. + /// The symbol no. 1, to which the current non-terminal symbol gets reduced. + /// The symbol no. 2, to which the current non-terminal symbol gets reduced. + /// The symbol no. 3, to which the current non-terminal symbol gets reduced. + /// The symbol no. 4, to which the current non-terminal symbol gets reduced. + /// The symbol no. 5, to which the current non-terminal symbol gets reduced. + /// The generic production function which gets called upon reduction. The function accepts the value stored inside the produced symbols, and returns the value to be stored inside the non-terminal symbol "". + /// The newly created production rule. + protected ProductionWrapper CreateProduction(NonTerminalWrapper non_terminal, SymbolWrapper symbol0, SymbolWrapper symbol1, SymbolWrapper symbol2, SymbolWrapper symbol3, SymbolWrapper symbol4, SymbolWrapper symbol5, Func func) => non_terminal.AddProduction(symbol0, symbol1, symbol2, symbol3, symbol4, symbol5).SetReduceFunction(func); + + /// + /// Creates a new production rule on the given non-terminal symbol using the given production function. + ///
+ /// This production represents the reducing of the given non-terminal to the 7 given symbols. + ///
+ /// The non-terminal symbol which gets reduced. + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The symbol no. 0, to which the current non-terminal symbol gets reduced. + /// The symbol no. 1, to which the current non-terminal symbol gets reduced. + /// The symbol no. 2, to which the current non-terminal symbol gets reduced. + /// The symbol no. 3, to which the current non-terminal symbol gets reduced. + /// The symbol no. 4, to which the current non-terminal symbol gets reduced. + /// The symbol no. 5, to which the current non-terminal symbol gets reduced. + /// The symbol no. 6, to which the current non-terminal symbol gets reduced. + /// The generic production function which gets called upon reduction. The function accepts the value stored inside the produced symbols, and returns the value to be stored inside the non-terminal symbol "". + /// The newly created production rule. + protected ProductionWrapper CreateProduction(NonTerminalWrapper non_terminal, SymbolWrapper symbol0, SymbolWrapper symbol1, SymbolWrapper symbol2, SymbolWrapper symbol3, SymbolWrapper symbol4, SymbolWrapper symbol5, SymbolWrapper symbol6, Func func) => non_terminal.AddProduction(symbol0, symbol1, symbol2, symbol3, symbol4, symbol5, symbol6).SetReduceFunction(func); + + /// + /// Creates a new production rule on the given non-terminal symbol using the given production function. + ///
+ /// This production represents the reducing of the given non-terminal to the 8 given symbols. + ///
+ /// The non-terminal symbol which gets reduced. + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The symbol no. 0, to which the current non-terminal symbol gets reduced. + /// The symbol no. 1, to which the current non-terminal symbol gets reduced. + /// The symbol no. 2, to which the current non-terminal symbol gets reduced. + /// The symbol no. 3, to which the current non-terminal symbol gets reduced. + /// The symbol no. 4, to which the current non-terminal symbol gets reduced. + /// The symbol no. 5, to which the current non-terminal symbol gets reduced. + /// The symbol no. 6, to which the current non-terminal symbol gets reduced. + /// The symbol no. 7, to which the current non-terminal symbol gets reduced. + /// The generic production function which gets called upon reduction. The function accepts the value stored inside the produced symbols, and returns the value to be stored inside the non-terminal symbol "". + /// The newly created production rule. + protected ProductionWrapper CreateProduction(NonTerminalWrapper non_terminal, SymbolWrapper symbol0, SymbolWrapper symbol1, SymbolWrapper symbol2, SymbolWrapper symbol3, SymbolWrapper symbol4, SymbolWrapper symbol5, SymbolWrapper symbol6, SymbolWrapper symbol7, Func func) => non_terminal.AddProduction(symbol0, symbol1, symbol2, symbol3, symbol4, symbol5, symbol6, symbol7).SetReduceFunction(func); + + /// + /// Creates a new production rule on the given non-terminal symbol using the given production function. + ///
+ /// This production represents the reducing of the given non-terminal to the 9 given symbols. + ///
+ /// The non-terminal symbol which gets reduced. + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The symbol no. 0, to which the current non-terminal symbol gets reduced. + /// The symbol no. 1, to which the current non-terminal symbol gets reduced. + /// The symbol no. 2, to which the current non-terminal symbol gets reduced. + /// The symbol no. 3, to which the current non-terminal symbol gets reduced. + /// The symbol no. 4, to which the current non-terminal symbol gets reduced. + /// The symbol no. 5, to which the current non-terminal symbol gets reduced. + /// The symbol no. 6, to which the current non-terminal symbol gets reduced. + /// The symbol no. 7, to which the current non-terminal symbol gets reduced. + /// The symbol no. 8, to which the current non-terminal symbol gets reduced. + /// The generic production function which gets called upon reduction. The function accepts the value stored inside the produced symbols, and returns the value to be stored inside the non-terminal symbol "". + /// The newly created production rule. + protected ProductionWrapper CreateProduction(NonTerminalWrapper non_terminal, SymbolWrapper symbol0, SymbolWrapper symbol1, SymbolWrapper symbol2, SymbolWrapper symbol3, SymbolWrapper symbol4, SymbolWrapper symbol5, SymbolWrapper symbol6, SymbolWrapper symbol7, SymbolWrapper symbol8, Func func) => non_terminal.AddProduction(symbol0, symbol1, symbol2, symbol3, symbol4, symbol5, symbol6, symbol7, symbol8).SetReduceFunction(func); + + /// + /// Creates a new production rule on the given non-terminal symbol using the given production function. + ///
+ /// This production represents the reducing of the given non-terminal to the 10 given symbols. + ///
+ /// The non-terminal symbol which gets reduced. + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The symbol no. 0, to which the current non-terminal symbol gets reduced. + /// The symbol no. 1, to which the current non-terminal symbol gets reduced. + /// The symbol no. 2, to which the current non-terminal symbol gets reduced. + /// The symbol no. 3, to which the current non-terminal symbol gets reduced. + /// The symbol no. 4, to which the current non-terminal symbol gets reduced. + /// The symbol no. 5, to which the current non-terminal symbol gets reduced. + /// The symbol no. 6, to which the current non-terminal symbol gets reduced. + /// The symbol no. 7, to which the current non-terminal symbol gets reduced. + /// The symbol no. 8, to which the current non-terminal symbol gets reduced. + /// The symbol no. 9, to which the current non-terminal symbol gets reduced. + /// The generic production function which gets called upon reduction. The function accepts the value stored inside the produced symbols, and returns the value to be stored inside the non-terminal symbol "". + /// The newly created production rule. + protected ProductionWrapper CreateProduction(NonTerminalWrapper non_terminal, SymbolWrapper symbol0, SymbolWrapper symbol1, SymbolWrapper symbol2, SymbolWrapper symbol3, SymbolWrapper symbol4, SymbolWrapper symbol5, SymbolWrapper symbol6, SymbolWrapper symbol7, SymbolWrapper symbol8, SymbolWrapper symbol9, Func func) => non_terminal.AddProduction(symbol0, symbol1, symbol2, symbol3, symbol4, symbol5, symbol6, symbol7, symbol8, symbol9).SetReduceFunction(func); + + /// + /// Creates a new production rule on the given non-terminal symbol using the given production function. + ///
+ /// This production represents the reducing of the given non-terminal to the 11 given symbols. + ///
+ /// The non-terminal symbol which gets reduced. + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The symbol no. 0, to which the current non-terminal symbol gets reduced. + /// The symbol no. 1, to which the current non-terminal symbol gets reduced. + /// The symbol no. 2, to which the current non-terminal symbol gets reduced. + /// The symbol no. 3, to which the current non-terminal symbol gets reduced. + /// The symbol no. 4, to which the current non-terminal symbol gets reduced. + /// The symbol no. 5, to which the current non-terminal symbol gets reduced. + /// The symbol no. 6, to which the current non-terminal symbol gets reduced. + /// The symbol no. 7, to which the current non-terminal symbol gets reduced. + /// The symbol no. 8, to which the current non-terminal symbol gets reduced. + /// The symbol no. 9, to which the current non-terminal symbol gets reduced. + /// The symbol no. 10, to which the current non-terminal symbol gets reduced. + /// The generic production function which gets called upon reduction. The function accepts the value stored inside the produced symbols, and returns the value to be stored inside the non-terminal symbol "". + /// The newly created production rule. + protected ProductionWrapper CreateProduction(NonTerminalWrapper non_terminal, SymbolWrapper symbol0, SymbolWrapper symbol1, SymbolWrapper symbol2, SymbolWrapper symbol3, SymbolWrapper symbol4, SymbolWrapper symbol5, SymbolWrapper symbol6, SymbolWrapper symbol7, SymbolWrapper symbol8, SymbolWrapper symbol9, SymbolWrapper symbol10, Func func) => non_terminal.AddProduction(symbol0, symbol1, symbol2, symbol3, symbol4, symbol5, symbol6, symbol7, symbol8, symbol9, symbol10).SetReduceFunction(func); + + /// + /// Creates a new production rule on the given non-terminal symbol using the given production function. + ///
+ /// This production represents the reducing of the given non-terminal to the 12 given symbols. + ///
+ /// The non-terminal symbol which gets reduced. + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The symbol no. 0, to which the current non-terminal symbol gets reduced. + /// The symbol no. 1, to which the current non-terminal symbol gets reduced. + /// The symbol no. 2, to which the current non-terminal symbol gets reduced. + /// The symbol no. 3, to which the current non-terminal symbol gets reduced. + /// The symbol no. 4, to which the current non-terminal symbol gets reduced. + /// The symbol no. 5, to which the current non-terminal symbol gets reduced. + /// The symbol no. 6, to which the current non-terminal symbol gets reduced. + /// The symbol no. 7, to which the current non-terminal symbol gets reduced. + /// The symbol no. 8, to which the current non-terminal symbol gets reduced. + /// The symbol no. 9, to which the current non-terminal symbol gets reduced. + /// The symbol no. 10, to which the current non-terminal symbol gets reduced. + /// The symbol no. 11, to which the current non-terminal symbol gets reduced. + /// The generic production function which gets called upon reduction. The function accepts the value stored inside the produced symbols, and returns the value to be stored inside the non-terminal symbol "". + /// The newly created production rule. + protected ProductionWrapper CreateProduction(NonTerminalWrapper non_terminal, SymbolWrapper symbol0, SymbolWrapper symbol1, SymbolWrapper symbol2, SymbolWrapper symbol3, SymbolWrapper symbol4, SymbolWrapper symbol5, SymbolWrapper symbol6, SymbolWrapper symbol7, SymbolWrapper symbol8, SymbolWrapper symbol9, SymbolWrapper symbol10, SymbolWrapper symbol11, Func func) => non_terminal.AddProduction(symbol0, symbol1, symbol2, symbol3, symbol4, symbol5, symbol6, symbol7, symbol8, symbol9, symbol10, symbol11).SetReduceFunction(func); + + /// + /// Creates a new production rule on the given non-terminal symbol using the given production function. + ///
+ /// This production represents the reducing of the given non-terminal to the 13 given symbols. + ///
+ /// The non-terminal symbol which gets reduced. + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The symbol no. 0, to which the current non-terminal symbol gets reduced. + /// The symbol no. 1, to which the current non-terminal symbol gets reduced. + /// The symbol no. 2, to which the current non-terminal symbol gets reduced. + /// The symbol no. 3, to which the current non-terminal symbol gets reduced. + /// The symbol no. 4, to which the current non-terminal symbol gets reduced. + /// The symbol no. 5, to which the current non-terminal symbol gets reduced. + /// The symbol no. 6, to which the current non-terminal symbol gets reduced. + /// The symbol no. 7, to which the current non-terminal symbol gets reduced. + /// The symbol no. 8, to which the current non-terminal symbol gets reduced. + /// The symbol no. 9, to which the current non-terminal symbol gets reduced. + /// The symbol no. 10, to which the current non-terminal symbol gets reduced. + /// The symbol no. 11, to which the current non-terminal symbol gets reduced. + /// The symbol no. 12, to which the current non-terminal symbol gets reduced. + /// The generic production function which gets called upon reduction. The function accepts the value stored inside the produced symbols, and returns the value to be stored inside the non-terminal symbol "". + /// The newly created production rule. + protected ProductionWrapper CreateProduction(NonTerminalWrapper non_terminal, SymbolWrapper symbol0, SymbolWrapper symbol1, SymbolWrapper symbol2, SymbolWrapper symbol3, SymbolWrapper symbol4, SymbolWrapper symbol5, SymbolWrapper symbol6, SymbolWrapper symbol7, SymbolWrapper symbol8, SymbolWrapper symbol9, SymbolWrapper symbol10, SymbolWrapper symbol11, SymbolWrapper symbol12, Func func) => non_terminal.AddProduction(symbol0, symbol1, symbol2, symbol3, symbol4, symbol5, symbol6, symbol7, symbol8, symbol9, symbol10, symbol11, symbol12).SetReduceFunction(func); + + /// + /// Creates a new production rule on the given non-terminal symbol using the given production function. + ///
+ /// This production represents the reducing of the given non-terminal to the 14 given symbols. + ///
+ /// The non-terminal symbol which gets reduced. + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The symbol no. 0, to which the current non-terminal symbol gets reduced. + /// The symbol no. 1, to which the current non-terminal symbol gets reduced. + /// The symbol no. 2, to which the current non-terminal symbol gets reduced. + /// The symbol no. 3, to which the current non-terminal symbol gets reduced. + /// The symbol no. 4, to which the current non-terminal symbol gets reduced. + /// The symbol no. 5, to which the current non-terminal symbol gets reduced. + /// The symbol no. 6, to which the current non-terminal symbol gets reduced. + /// The symbol no. 7, to which the current non-terminal symbol gets reduced. + /// The symbol no. 8, to which the current non-terminal symbol gets reduced. + /// The symbol no. 9, to which the current non-terminal symbol gets reduced. + /// The symbol no. 10, to which the current non-terminal symbol gets reduced. + /// The symbol no. 11, to which the current non-terminal symbol gets reduced. + /// The symbol no. 12, to which the current non-terminal symbol gets reduced. + /// The symbol no. 13, to which the current non-terminal symbol gets reduced. + /// The generic production function which gets called upon reduction. The function accepts the value stored inside the produced symbols, and returns the value to be stored inside the non-terminal symbol "". + /// The newly created production rule. + protected ProductionWrapper CreateProduction(NonTerminalWrapper non_terminal, SymbolWrapper symbol0, SymbolWrapper symbol1, SymbolWrapper symbol2, SymbolWrapper symbol3, SymbolWrapper symbol4, SymbolWrapper symbol5, SymbolWrapper symbol6, SymbolWrapper symbol7, SymbolWrapper symbol8, SymbolWrapper symbol9, SymbolWrapper symbol10, SymbolWrapper symbol11, SymbolWrapper symbol12, SymbolWrapper symbol13, Func func) => non_terminal.AddProduction(symbol0, symbol1, symbol2, symbol3, symbol4, symbol5, symbol6, symbol7, symbol8, symbol9, symbol10, symbol11, symbol12, symbol13).SetReduceFunction(func); + + /// + /// Creates a new production rule on the given non-terminal symbol using the given production function. + ///
+ /// This production represents the reducing of the given non-terminal to the 15 given symbols. + ///
+ /// The non-terminal symbol which gets reduced. + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The symbol no. 0, to which the current non-terminal symbol gets reduced. + /// The symbol no. 1, to which the current non-terminal symbol gets reduced. + /// The symbol no. 2, to which the current non-terminal symbol gets reduced. + /// The symbol no. 3, to which the current non-terminal symbol gets reduced. + /// The symbol no. 4, to which the current non-terminal symbol gets reduced. + /// The symbol no. 5, to which the current non-terminal symbol gets reduced. + /// The symbol no. 6, to which the current non-terminal symbol gets reduced. + /// The symbol no. 7, to which the current non-terminal symbol gets reduced. + /// The symbol no. 8, to which the current non-terminal symbol gets reduced. + /// The symbol no. 9, to which the current non-terminal symbol gets reduced. + /// The symbol no. 10, to which the current non-terminal symbol gets reduced. + /// The symbol no. 11, to which the current non-terminal symbol gets reduced. + /// The symbol no. 12, to which the current non-terminal symbol gets reduced. + /// The symbol no. 13, to which the current non-terminal symbol gets reduced. + /// The symbol no. 14, to which the current non-terminal symbol gets reduced. + /// The generic production function which gets called upon reduction. The function accepts the value stored inside the produced symbols, and returns the value to be stored inside the non-terminal symbol "". + /// The newly created production rule. + protected ProductionWrapper CreateProduction(NonTerminalWrapper non_terminal, SymbolWrapper symbol0, SymbolWrapper symbol1, SymbolWrapper symbol2, SymbolWrapper symbol3, SymbolWrapper symbol4, SymbolWrapper symbol5, SymbolWrapper symbol6, SymbolWrapper symbol7, SymbolWrapper symbol8, SymbolWrapper symbol9, SymbolWrapper symbol10, SymbolWrapper symbol11, SymbolWrapper symbol12, SymbolWrapper symbol13, SymbolWrapper symbol14, Func func) => non_terminal.AddProduction(symbol0, symbol1, symbol2, symbol3, symbol4, symbol5, symbol6, symbol7, symbol8, symbol9, symbol10, symbol11, symbol12, symbol13, symbol14).SetReduceFunction(func); + + /// + /// Creates a new production rule on the given non-terminal symbol using the given production function. + ///
+ /// This production represents the reducing of the given non-terminal to the 16 given symbols. + ///
+ /// The non-terminal symbol which gets reduced. + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The symbol no. 0, to which the current non-terminal symbol gets reduced. + /// The symbol no. 1, to which the current non-terminal symbol gets reduced. + /// The symbol no. 2, to which the current non-terminal symbol gets reduced. + /// The symbol no. 3, to which the current non-terminal symbol gets reduced. + /// The symbol no. 4, to which the current non-terminal symbol gets reduced. + /// The symbol no. 5, to which the current non-terminal symbol gets reduced. + /// The symbol no. 6, to which the current non-terminal symbol gets reduced. + /// The symbol no. 7, to which the current non-terminal symbol gets reduced. + /// The symbol no. 8, to which the current non-terminal symbol gets reduced. + /// The symbol no. 9, to which the current non-terminal symbol gets reduced. + /// The symbol no. 10, to which the current non-terminal symbol gets reduced. + /// The symbol no. 11, to which the current non-terminal symbol gets reduced. + /// The symbol no. 12, to which the current non-terminal symbol gets reduced. + /// The symbol no. 13, to which the current non-terminal symbol gets reduced. + /// The symbol no. 14, to which the current non-terminal symbol gets reduced. + /// The symbol no. 15, to which the current non-terminal symbol gets reduced. + /// The generic production function which gets called upon reduction. The function accepts the value stored inside the produced symbols, and returns the value to be stored inside the non-terminal symbol "". + /// The newly created production rule. + protected ProductionWrapper CreateProduction(NonTerminalWrapper non_terminal, SymbolWrapper symbol0, SymbolWrapper symbol1, SymbolWrapper symbol2, SymbolWrapper symbol3, SymbolWrapper symbol4, SymbolWrapper symbol5, SymbolWrapper symbol6, SymbolWrapper symbol7, SymbolWrapper symbol8, SymbolWrapper symbol9, SymbolWrapper symbol10, SymbolWrapper symbol11, SymbolWrapper symbol12, SymbolWrapper symbol13, SymbolWrapper symbol14, SymbolWrapper symbol15, Func func) => non_terminal.AddProduction(symbol0, symbol1, symbol2, symbol3, symbol4, symbol5, symbol6, symbol7, symbol8, symbol9, symbol10, symbol11, symbol12, symbol13, symbol14, symbol15).SetReduceFunction(func); + + /// + /// Creates a new parser based on the current configurator and returns it. + /// + /// Note: Only the first call of creates a new parser. If you wish to reset the generated parser and re-create it, call the method beforehand. + /// + /// The constructed parser. + public ParserWrapper CreateParser() + { + if (_parser is null) + { + NonTerminalWrapper nt = CreateNonTerminal(); + + Construct(nt); + + Configurator.SetStartSymbol((INonTerminal)nt.Symbol); + _parser = new ParserWrapper(Configurator.CreateParser()); + } + + return _parser; + } + + /// + /// Resets the constructed parser to , thereby forcing the parser to be re-constructed based on the current conficuration the next time is called. + /// + public void ResetParser() => _parser = null; + + /// + /// Constructs the parser. This method must be implemented by every constructor based on + /// + /// The non-terminal production start symbol. The value of this symbol will be returned when the constructed parser is executed. + protected abstract void Construct(NonTerminalWrapper start_symbol); + + + /// + /// Represents a wrapper for the generic parser. + /// + public sealed class ParserWrapper + { + /// + /// The internal parser instance. + /// + public IParser Parser { get; } + + + internal ParserWrapper(IParser parser) => Parser = parser; + + /// + /// Tries to parse the given string and returns whether the parsing was successful. + /// + /// The input string. + /// The parsed value. + public bool TryParse(string input, [MaybeNullWhen(false)] out ParserResult? result) + { + try + { + result = Parse(input); + } + catch (LexerException) + { + result = null; + } + + return result is { }; + } + + /// + /// Parses the given string and returns the parsed value of the type . + /// + /// The input string. + /// The parsed value. + public ParserResult Parse(string input) + { + List> tokens = new List>(); + void aggregate(LexedToken token) + { + tokens.Add(token); + + if (token is LexedNonTerminal nt) + foreach (LexedToken child in nt.ChildNodes) + aggregate(child); + } + + aggregate(Parser.ParseTokens(input)); + + return new ParserResult((TOut)tokens[0].SymbolValue, input.Split('\n'), tokens.ToArray()); + } + } + } + + public sealed class ParserResult + { + public TOut ParsedValue { get; } + public string[] SourceLines { get; } + public LexedToken[] LexedTokens { get; } + + + internal ParserResult(TOut parsedValue, string[] sourceLines, LexedToken[] lexedTokens) + { + ParsedValue = parsedValue; + SourceLines = sourceLines; + LexedTokens = lexedTokens; + } + + public static implicit operator TOut(ParserResult res) => res.ParsedValue; + } + + /// + /// An interface for generic terminal wrappers. + /// + public interface ITerminalWrapper + { + /// + /// The underlying terminal symbol. + /// + ITerminal Symbol { get; } + } + + /// + /// Represents a generic symbol wrapper. + /// + /// The generic type stored inside the symbol. + public class SymbolWrapper + { + /// + /// The underlying (boxed) symbol. + /// + public ISymbol Symbol { get; } + /// + /// The type of the generic value stored inside the symbol. + /// + public Type SymbolType => typeof(T); + + + /// + /// Creates a new generic symbol wrapper for the given (boxed) symbol. + /// + /// Boxed symbol. + public SymbolWrapper(ISymbol symbol) => Symbol = symbol; + + /// + public override string? ToString() => Symbol.ToString(); + } + + /// + /// Represents a generic terminal symbol wrapper. + /// + /// + /// The generic type stored inside the symbol. + public sealed class TerminalWrapper + : SymbolWrapper + , ITerminalWrapper + { + /// + ITerminal ITerminalWrapper.Symbol => (ITerminal)Symbol; + + /// + /// Creates a new generic symbol wrapper for the given (boxed) terminal symbol. + /// + /// Boxed terminal symbol. + public TerminalWrapper(ISymbol symbol) + : base(symbol) + { + } + } + + /// + /// Represents a generic non-terminal symbol wrapper. + /// + /// + /// The generic type stored inside the symbol. + public sealed class NonTerminalWrapper + : SymbolWrapper + { + /// + /// Creates a new generic symbol wrapper for the given (boxed) non-terminal symbol. + /// + /// Boxed non-terminal symbol. + public NonTerminalWrapper(ISymbol symbol) + : base(symbol) + { + } + + /// + /// Creates a new (empty) production rule on the current non-terminal symbol and returns it. + /// + /// The newly created production rule. + public ProductionWrapper AddProduction() => new ProductionWrapper(((INonTerminal)Symbol).AddProduction()); + + /// + /// Creates a new production rule on the current non-terminal symbol. + ///
+ /// This production represents the reducing of the current non-terminal to the 1 given symbols. + ///
+ /// The generic type of . + /// The symbol no. 0, to which the current non-terminal symbol gets reduced. + /// The newly created production rule. + public ProductionWrapper AddProduction(SymbolWrapper sym0) => + new ProductionWrapper(((INonTerminal)Symbol).AddProduction(sym0.Symbol)); + + /// + /// Creates a new production rule on the current non-terminal symbol. + ///
+ /// This production represents the reducing of the current non-terminal to the 2 given symbols. + ///
+ /// The generic type of . + /// The generic type of . + /// The symbol no. 0, to which the current non-terminal symbol gets reduced. + /// The symbol no. 1, to which the current non-terminal symbol gets reduced. + /// The newly created production rule. + public ProductionWrapper AddProduction(SymbolWrapper sym0, SymbolWrapper sym1) => + new ProductionWrapper(((INonTerminal)Symbol).AddProduction(sym0.Symbol, sym1.Symbol)); + + /// + /// Creates a new production rule on the current non-terminal symbol. + ///
+ /// This production represents the reducing of the current non-terminal to the 3 given symbols. + ///
+ /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The symbol no. 0, to which the current non-terminal symbol gets reduced. + /// The symbol no. 1, to which the current non-terminal symbol gets reduced. + /// The symbol no. 2, to which the current non-terminal symbol gets reduced. + /// The newly created production rule. + public ProductionWrapper AddProduction(SymbolWrapper sym0, SymbolWrapper sym1, SymbolWrapper sym2) => + new ProductionWrapper(((INonTerminal)Symbol).AddProduction(sym0.Symbol, sym1.Symbol, sym2.Symbol)); + + /// + /// Creates a new production rule on the current non-terminal symbol. + ///
+ /// This production represents the reducing of the current non-terminal to the 4 given symbols. + ///
+ /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The symbol no. 0, to which the current non-terminal symbol gets reduced. + /// The symbol no. 1, to which the current non-terminal symbol gets reduced. + /// The symbol no. 2, to which the current non-terminal symbol gets reduced. + /// The symbol no. 3, to which the current non-terminal symbol gets reduced. + /// The newly created production rule. + public ProductionWrapper AddProduction(SymbolWrapper sym0, SymbolWrapper sym1, SymbolWrapper sym2, SymbolWrapper sym3) => + new ProductionWrapper(((INonTerminal)Symbol).AddProduction(sym0.Symbol, sym1.Symbol, sym2.Symbol, sym3.Symbol)); + + /// + /// Creates a new production rule on the current non-terminal symbol. + ///
+ /// This production represents the reducing of the current non-terminal to the 5 given symbols. + ///
+ /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The symbol no. 0, to which the current non-terminal symbol gets reduced. + /// The symbol no. 1, to which the current non-terminal symbol gets reduced. + /// The symbol no. 2, to which the current non-terminal symbol gets reduced. + /// The symbol no. 3, to which the current non-terminal symbol gets reduced. + /// The symbol no. 4, to which the current non-terminal symbol gets reduced. + /// The newly created production rule. + public ProductionWrapper AddProduction(SymbolWrapper sym0, SymbolWrapper sym1, SymbolWrapper sym2, SymbolWrapper sym3, SymbolWrapper sym4) => + new ProductionWrapper(((INonTerminal)Symbol).AddProduction(sym0.Symbol, sym1.Symbol, sym2.Symbol, sym3.Symbol, sym4.Symbol)); + + /// + /// Creates a new production rule on the current non-terminal symbol. + ///
+ /// This production represents the reducing of the current non-terminal to the 6 given symbols. + ///
+ /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The symbol no. 0, to which the current non-terminal symbol gets reduced. + /// The symbol no. 1, to which the current non-terminal symbol gets reduced. + /// The symbol no. 2, to which the current non-terminal symbol gets reduced. + /// The symbol no. 3, to which the current non-terminal symbol gets reduced. + /// The symbol no. 4, to which the current non-terminal symbol gets reduced. + /// The symbol no. 5, to which the current non-terminal symbol gets reduced. + /// The newly created production rule. + public ProductionWrapper AddProduction(SymbolWrapper sym0, SymbolWrapper sym1, SymbolWrapper sym2, SymbolWrapper sym3, SymbolWrapper sym4, SymbolWrapper sym5) => + new ProductionWrapper(((INonTerminal)Symbol).AddProduction(sym0.Symbol, sym1.Symbol, sym2.Symbol, sym3.Symbol, sym4.Symbol, sym5.Symbol)); + + /// + /// Creates a new production rule on the current non-terminal symbol. + ///
+ /// This production represents the reducing of the current non-terminal to the 7 given symbols. + ///
+ /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The symbol no. 0, to which the current non-terminal symbol gets reduced. + /// The symbol no. 1, to which the current non-terminal symbol gets reduced. + /// The symbol no. 2, to which the current non-terminal symbol gets reduced. + /// The symbol no. 3, to which the current non-terminal symbol gets reduced. + /// The symbol no. 4, to which the current non-terminal symbol gets reduced. + /// The symbol no. 5, to which the current non-terminal symbol gets reduced. + /// The symbol no. 6, to which the current non-terminal symbol gets reduced. + /// The newly created production rule. + public ProductionWrapper AddProduction(SymbolWrapper sym0, SymbolWrapper sym1, SymbolWrapper sym2, SymbolWrapper sym3, SymbolWrapper sym4, SymbolWrapper sym5, SymbolWrapper sym6) => + new ProductionWrapper(((INonTerminal)Symbol).AddProduction(sym0.Symbol, sym1.Symbol, sym2.Symbol, sym3.Symbol, sym4.Symbol, sym5.Symbol, sym6.Symbol)); + + /// + /// Creates a new production rule on the current non-terminal symbol. + ///
+ /// This production represents the reducing of the current non-terminal to the 8 given symbols. + ///
+ /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The symbol no. 0, to which the current non-terminal symbol gets reduced. + /// The symbol no. 1, to which the current non-terminal symbol gets reduced. + /// The symbol no. 2, to which the current non-terminal symbol gets reduced. + /// The symbol no. 3, to which the current non-terminal symbol gets reduced. + /// The symbol no. 4, to which the current non-terminal symbol gets reduced. + /// The symbol no. 5, to which the current non-terminal symbol gets reduced. + /// The symbol no. 6, to which the current non-terminal symbol gets reduced. + /// The symbol no. 7, to which the current non-terminal symbol gets reduced. + /// The newly created production rule. + public ProductionWrapper AddProduction(SymbolWrapper sym0, SymbolWrapper sym1, SymbolWrapper sym2, SymbolWrapper sym3, SymbolWrapper sym4, SymbolWrapper sym5, SymbolWrapper sym6, SymbolWrapper sym7) => + new ProductionWrapper(((INonTerminal)Symbol).AddProduction(sym0.Symbol, sym1.Symbol, sym2.Symbol, sym3.Symbol, sym4.Symbol, sym5.Symbol, sym6.Symbol, sym7.Symbol)); + + /// + /// Creates a new production rule on the current non-terminal symbol. + ///
+ /// This production represents the reducing of the current non-terminal to the 9 given symbols. + ///
+ /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The symbol no. 0, to which the current non-terminal symbol gets reduced. + /// The symbol no. 1, to which the current non-terminal symbol gets reduced. + /// The symbol no. 2, to which the current non-terminal symbol gets reduced. + /// The symbol no. 3, to which the current non-terminal symbol gets reduced. + /// The symbol no. 4, to which the current non-terminal symbol gets reduced. + /// The symbol no. 5, to which the current non-terminal symbol gets reduced. + /// The symbol no. 6, to which the current non-terminal symbol gets reduced. + /// The symbol no. 7, to which the current non-terminal symbol gets reduced. + /// The symbol no. 8, to which the current non-terminal symbol gets reduced. + /// The newly created production rule. + public ProductionWrapper AddProduction(SymbolWrapper sym0, SymbolWrapper sym1, SymbolWrapper sym2, SymbolWrapper sym3, SymbolWrapper sym4, SymbolWrapper sym5, SymbolWrapper sym6, SymbolWrapper sym7, SymbolWrapper sym8) => + new ProductionWrapper(((INonTerminal)Symbol).AddProduction(sym0.Symbol, sym1.Symbol, sym2.Symbol, sym3.Symbol, sym4.Symbol, sym5.Symbol, sym6.Symbol, sym7.Symbol, sym8.Symbol)); + + /// + /// Creates a new production rule on the current non-terminal symbol. + ///
+ /// This production represents the reducing of the current non-terminal to the 10 given symbols. + ///
+ /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The symbol no. 0, to which the current non-terminal symbol gets reduced. + /// The symbol no. 1, to which the current non-terminal symbol gets reduced. + /// The symbol no. 2, to which the current non-terminal symbol gets reduced. + /// The symbol no. 3, to which the current non-terminal symbol gets reduced. + /// The symbol no. 4, to which the current non-terminal symbol gets reduced. + /// The symbol no. 5, to which the current non-terminal symbol gets reduced. + /// The symbol no. 6, to which the current non-terminal symbol gets reduced. + /// The symbol no. 7, to which the current non-terminal symbol gets reduced. + /// The symbol no. 8, to which the current non-terminal symbol gets reduced. + /// The symbol no. 9, to which the current non-terminal symbol gets reduced. + /// The newly created production rule. + public ProductionWrapper AddProduction(SymbolWrapper sym0, SymbolWrapper sym1, SymbolWrapper sym2, SymbolWrapper sym3, SymbolWrapper sym4, SymbolWrapper sym5, SymbolWrapper sym6, SymbolWrapper sym7, SymbolWrapper sym8, SymbolWrapper sym9) => + new ProductionWrapper(((INonTerminal)Symbol).AddProduction(sym0.Symbol, sym1.Symbol, sym2.Symbol, sym3.Symbol, sym4.Symbol, sym5.Symbol, sym6.Symbol, sym7.Symbol, sym8.Symbol, sym9.Symbol)); + + /// + /// Creates a new production rule on the current non-terminal symbol. + ///
+ /// This production represents the reducing of the current non-terminal to the 11 given symbols. + ///
+ /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The symbol no. 0, to which the current non-terminal symbol gets reduced. + /// The symbol no. 1, to which the current non-terminal symbol gets reduced. + /// The symbol no. 2, to which the current non-terminal symbol gets reduced. + /// The symbol no. 3, to which the current non-terminal symbol gets reduced. + /// The symbol no. 4, to which the current non-terminal symbol gets reduced. + /// The symbol no. 5, to which the current non-terminal symbol gets reduced. + /// The symbol no. 6, to which the current non-terminal symbol gets reduced. + /// The symbol no. 7, to which the current non-terminal symbol gets reduced. + /// The symbol no. 8, to which the current non-terminal symbol gets reduced. + /// The symbol no. 9, to which the current non-terminal symbol gets reduced. + /// The symbol no. 10, to which the current non-terminal symbol gets reduced. + /// The newly created production rule. + public ProductionWrapper AddProduction(SymbolWrapper sym0, SymbolWrapper sym1, SymbolWrapper sym2, SymbolWrapper sym3, SymbolWrapper sym4, SymbolWrapper sym5, SymbolWrapper sym6, SymbolWrapper sym7, SymbolWrapper sym8, SymbolWrapper sym9, SymbolWrapper sym10) => + new ProductionWrapper(((INonTerminal)Symbol).AddProduction(sym0.Symbol, sym1.Symbol, sym2.Symbol, sym3.Symbol, sym4.Symbol, sym5.Symbol, sym6.Symbol, sym7.Symbol, sym8.Symbol, sym9.Symbol, sym10.Symbol)); + + /// + /// Creates a new production rule on the current non-terminal symbol. + ///
+ /// This production represents the reducing of the current non-terminal to the 12 given symbols. + ///
+ /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The symbol no. 0, to which the current non-terminal symbol gets reduced. + /// The symbol no. 1, to which the current non-terminal symbol gets reduced. + /// The symbol no. 2, to which the current non-terminal symbol gets reduced. + /// The symbol no. 3, to which the current non-terminal symbol gets reduced. + /// The symbol no. 4, to which the current non-terminal symbol gets reduced. + /// The symbol no. 5, to which the current non-terminal symbol gets reduced. + /// The symbol no. 6, to which the current non-terminal symbol gets reduced. + /// The symbol no. 7, to which the current non-terminal symbol gets reduced. + /// The symbol no. 8, to which the current non-terminal symbol gets reduced. + /// The symbol no. 9, to which the current non-terminal symbol gets reduced. + /// The symbol no. 10, to which the current non-terminal symbol gets reduced. + /// The symbol no. 11, to which the current non-terminal symbol gets reduced. + /// The newly created production rule. + public ProductionWrapper AddProduction(SymbolWrapper sym0, SymbolWrapper sym1, SymbolWrapper sym2, SymbolWrapper sym3, SymbolWrapper sym4, SymbolWrapper sym5, SymbolWrapper sym6, SymbolWrapper sym7, SymbolWrapper sym8, SymbolWrapper sym9, SymbolWrapper sym10, SymbolWrapper sym11) => + new ProductionWrapper(((INonTerminal)Symbol).AddProduction(sym0.Symbol, sym1.Symbol, sym2.Symbol, sym3.Symbol, sym4.Symbol, sym5.Symbol, sym6.Symbol, sym7.Symbol, sym8.Symbol, sym9.Symbol, sym10.Symbol, sym11.Symbol)); + + /// + /// Creates a new production rule on the current non-terminal symbol. + ///
+ /// This production represents the reducing of the current non-terminal to the 13 given symbols. + ///
+ /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The symbol no. 0, to which the current non-terminal symbol gets reduced. + /// The symbol no. 1, to which the current non-terminal symbol gets reduced. + /// The symbol no. 2, to which the current non-terminal symbol gets reduced. + /// The symbol no. 3, to which the current non-terminal symbol gets reduced. + /// The symbol no. 4, to which the current non-terminal symbol gets reduced. + /// The symbol no. 5, to which the current non-terminal symbol gets reduced. + /// The symbol no. 6, to which the current non-terminal symbol gets reduced. + /// The symbol no. 7, to which the current non-terminal symbol gets reduced. + /// The symbol no. 8, to which the current non-terminal symbol gets reduced. + /// The symbol no. 9, to which the current non-terminal symbol gets reduced. + /// The symbol no. 10, to which the current non-terminal symbol gets reduced. + /// The symbol no. 11, to which the current non-terminal symbol gets reduced. + /// The symbol no. 12, to which the current non-terminal symbol gets reduced. + /// The newly created production rule. + public ProductionWrapper AddProduction(SymbolWrapper sym0, SymbolWrapper sym1, SymbolWrapper sym2, SymbolWrapper sym3, SymbolWrapper sym4, SymbolWrapper sym5, SymbolWrapper sym6, SymbolWrapper sym7, SymbolWrapper sym8, SymbolWrapper sym9, SymbolWrapper sym10, SymbolWrapper sym11, SymbolWrapper sym12) => + new ProductionWrapper(((INonTerminal)Symbol).AddProduction(sym0.Symbol, sym1.Symbol, sym2.Symbol, sym3.Symbol, sym4.Symbol, sym5.Symbol, sym6.Symbol, sym7.Symbol, sym8.Symbol, sym9.Symbol, sym10.Symbol, sym11.Symbol, sym12.Symbol)); + + /// + /// Creates a new production rule on the current non-terminal symbol. + ///
+ /// This production represents the reducing of the current non-terminal to the 14 given symbols. + ///
+ /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The symbol no. 0, to which the current non-terminal symbol gets reduced. + /// The symbol no. 1, to which the current non-terminal symbol gets reduced. + /// The symbol no. 2, to which the current non-terminal symbol gets reduced. + /// The symbol no. 3, to which the current non-terminal symbol gets reduced. + /// The symbol no. 4, to which the current non-terminal symbol gets reduced. + /// The symbol no. 5, to which the current non-terminal symbol gets reduced. + /// The symbol no. 6, to which the current non-terminal symbol gets reduced. + /// The symbol no. 7, to which the current non-terminal symbol gets reduced. + /// The symbol no. 8, to which the current non-terminal symbol gets reduced. + /// The symbol no. 9, to which the current non-terminal symbol gets reduced. + /// The symbol no. 10, to which the current non-terminal symbol gets reduced. + /// The symbol no. 11, to which the current non-terminal symbol gets reduced. + /// The symbol no. 12, to which the current non-terminal symbol gets reduced. + /// The symbol no. 13, to which the current non-terminal symbol gets reduced. + /// The newly created production rule. + public ProductionWrapper AddProduction(SymbolWrapper sym0, SymbolWrapper sym1, SymbolWrapper sym2, SymbolWrapper sym3, SymbolWrapper sym4, SymbolWrapper sym5, SymbolWrapper sym6, SymbolWrapper sym7, SymbolWrapper sym8, SymbolWrapper sym9, SymbolWrapper sym10, SymbolWrapper sym11, SymbolWrapper sym12, SymbolWrapper sym13) => + new ProductionWrapper(((INonTerminal)Symbol).AddProduction(sym0.Symbol, sym1.Symbol, sym2.Symbol, sym3.Symbol, sym4.Symbol, sym5.Symbol, sym6.Symbol, sym7.Symbol, sym8.Symbol, sym9.Symbol, sym10.Symbol, sym11.Symbol, sym12.Symbol, sym13.Symbol)); + + /// + /// Creates a new production rule on the current non-terminal symbol. + ///
+ /// This production represents the reducing of the current non-terminal to the 15 given symbols. + ///
+ /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The symbol no. 0, to which the current non-terminal symbol gets reduced. + /// The symbol no. 1, to which the current non-terminal symbol gets reduced. + /// The symbol no. 2, to which the current non-terminal symbol gets reduced. + /// The symbol no. 3, to which the current non-terminal symbol gets reduced. + /// The symbol no. 4, to which the current non-terminal symbol gets reduced. + /// The symbol no. 5, to which the current non-terminal symbol gets reduced. + /// The symbol no. 6, to which the current non-terminal symbol gets reduced. + /// The symbol no. 7, to which the current non-terminal symbol gets reduced. + /// The symbol no. 8, to which the current non-terminal symbol gets reduced. + /// The symbol no. 9, to which the current non-terminal symbol gets reduced. + /// The symbol no. 10, to which the current non-terminal symbol gets reduced. + /// The symbol no. 11, to which the current non-terminal symbol gets reduced. + /// The symbol no. 12, to which the current non-terminal symbol gets reduced. + /// The symbol no. 13, to which the current non-terminal symbol gets reduced. + /// The symbol no. 14, to which the current non-terminal symbol gets reduced. + /// The newly created production rule. + public ProductionWrapper AddProduction(SymbolWrapper sym0, SymbolWrapper sym1, SymbolWrapper sym2, SymbolWrapper sym3, SymbolWrapper sym4, SymbolWrapper sym5, SymbolWrapper sym6, SymbolWrapper sym7, SymbolWrapper sym8, SymbolWrapper sym9, SymbolWrapper sym10, SymbolWrapper sym11, SymbolWrapper sym12, SymbolWrapper sym13, SymbolWrapper sym14) => + new ProductionWrapper(((INonTerminal)Symbol).AddProduction(sym0.Symbol, sym1.Symbol, sym2.Symbol, sym3.Symbol, sym4.Symbol, sym5.Symbol, sym6.Symbol, sym7.Symbol, sym8.Symbol, sym9.Symbol, sym10.Symbol, sym11.Symbol, sym12.Symbol, sym13.Symbol, sym14.Symbol)); + + /// + /// Creates a new production rule on the current non-terminal symbol. + ///
+ /// This production represents the reducing of the current non-terminal to the 16 given symbols. + ///
+ /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The generic type of . + /// The symbol no. 0, to which the current non-terminal symbol gets reduced. + /// The symbol no. 1, to which the current non-terminal symbol gets reduced. + /// The symbol no. 2, to which the current non-terminal symbol gets reduced. + /// The symbol no. 3, to which the current non-terminal symbol gets reduced. + /// The symbol no. 4, to which the current non-terminal symbol gets reduced. + /// The symbol no. 5, to which the current non-terminal symbol gets reduced. + /// The symbol no. 6, to which the current non-terminal symbol gets reduced. + /// The symbol no. 7, to which the current non-terminal symbol gets reduced. + /// The symbol no. 8, to which the current non-terminal symbol gets reduced. + /// The symbol no. 9, to which the current non-terminal symbol gets reduced. + /// The symbol no. 10, to which the current non-terminal symbol gets reduced. + /// The symbol no. 11, to which the current non-terminal symbol gets reduced. + /// The symbol no. 12, to which the current non-terminal symbol gets reduced. + /// The symbol no. 13, to which the current non-terminal symbol gets reduced. + /// The symbol no. 14, to which the current non-terminal symbol gets reduced. + /// The symbol no. 15, to which the current non-terminal symbol gets reduced. + /// The newly created production rule. + public ProductionWrapper AddProduction(SymbolWrapper sym0, SymbolWrapper sym1, SymbolWrapper sym2, SymbolWrapper sym3, SymbolWrapper sym4, SymbolWrapper sym5, SymbolWrapper sym6, SymbolWrapper sym7, SymbolWrapper sym8, SymbolWrapper sym9, SymbolWrapper sym10, SymbolWrapper sym11, SymbolWrapper sym12, SymbolWrapper sym13, SymbolWrapper sym14, SymbolWrapper sym15) => + new ProductionWrapper(((INonTerminal)Symbol).AddProduction(sym0.Symbol, sym1.Symbol, sym2.Symbol, sym3.Symbol, sym4.Symbol, sym5.Symbol, sym6.Symbol, sym7.Symbol, sym8.Symbol, sym9.Symbol, sym10.Symbol, sym11.Symbol, sym12.Symbol, sym13.Symbol, sym14.Symbol, sym15.Symbol)); + + } + + /// + /// Represents an abstract generic production wrapper. + /// + /// The generic return type of the production. This is the type stored inside the non-terminal which gets reduced by the production represented by this wrapper. + public abstract class ProductionWrapperBase + where T : ProductionWrapperBase + { + /// + /// The underlying (boxed) production of this wrapper. + /// + public IProduction Production { get; } + + + /// + /// Creates a new abstract generic production wrapper based on the given (boxed) production. + /// + /// Boxed production instance. + public ProductionWrapperBase(IProduction production) => Production = production; + + /// + /// Configures the production to reduce the non-terminal to the first symbol. This is equivalent to with the index 0. + /// + /// The current instance. + public T SetReduceToFirst() + { + Production.SetReduceToFirst(); + + return (T)this; + } + + /// + /// Sets given precedence group to the current production. + /// + /// Precedence group to the assigned to the current production. + /// The current instance. + public T SetPrecedence(IPrecedenceGroup precedence) + { + Production.SetPrecedence(precedence); + + return (T)this; + } + } + + /// + /// Represents a generic reduce function of the type "(R) -> ". + /// + /// + /// The generic return type of the production. + public sealed class ProductionWrapper + : ProductionWrapperBase> + { + /// + /// Creates a new generic production wrapper based on the given (boxed) production. + /// + /// + /// Boxed production instance. + public ProductionWrapper(IProduction production) + : base(production) + { + } + + public ProductionWrapper SetReduceFunction(Func f) + { + Production.SetReduceFunction(args => f()); + + return this; + } + } + + /// + /// Represents a generic reduce function of the type "(T0, R) -> ". + /// + /// + /// The generic input type of the symbol no. 0. + /// The generic return type of the production. + public sealed class ProductionWrapper + : ProductionWrapperBase> + { + /// + /// Creates a new generic production wrapper based on the given (boxed) production. + /// + /// + /// Boxed production instance. + public ProductionWrapper(IProduction production) + : base(production) + { + } + + public ProductionWrapper SetReduceFunction(Func f) + { + Production.SetReduceFunction(args => f((T0)args[0])); + + return this; + } + } + + /// + /// Represents a generic reduce function of the type "(T0, T1, R) -> ". + /// + /// + /// The generic input type of the symbol no. 0. + /// The generic input type of the symbol no. 1. + /// The generic return type of the production. + public sealed class ProductionWrapper + : ProductionWrapperBase> + { + /// + /// Creates a new generic production wrapper based on the given (boxed) production. + /// + /// + /// Boxed production instance. + public ProductionWrapper(IProduction production) + : base(production) + { + } + + public ProductionWrapper SetReduceFunction(Func f) + { + Production.SetReduceFunction(args => f((T0)args[0], (T1)args[1])); + + return this; + } + } + + /// + /// Represents a generic reduce function of the type "(T0, T1, T2, R) -> ". + /// + /// + /// The generic input type of the symbol no. 0. + /// The generic input type of the symbol no. 1. + /// The generic input type of the symbol no. 2. + /// The generic return type of the production. + public sealed class ProductionWrapper + : ProductionWrapperBase> + { + /// + /// Creates a new generic production wrapper based on the given (boxed) production. + /// + /// + /// Boxed production instance. + public ProductionWrapper(IProduction production) + : base(production) + { + } + + public ProductionWrapper SetReduceFunction(Func f) + { + Production.SetReduceFunction(args => f((T0)args[0], (T1)args[1], (T2)args[2])); + + return this; + } + } + + /// + /// Represents a generic reduce function of the type "(T0, T1, T2, T3, R) -> ". + /// + /// + /// The generic input type of the symbol no. 0. + /// The generic input type of the symbol no. 1. + /// The generic input type of the symbol no. 2. + /// The generic input type of the symbol no. 3. + /// The generic return type of the production. + public sealed class ProductionWrapper + : ProductionWrapperBase> + { + /// + /// Creates a new generic production wrapper based on the given (boxed) production. + /// + /// + /// Boxed production instance. + public ProductionWrapper(IProduction production) + : base(production) + { + } + + public ProductionWrapper SetReduceFunction(Func f) + { + Production.SetReduceFunction(args => f((T0)args[0], (T1)args[1], (T2)args[2], (T3)args[3])); + + return this; + } + } + + /// + /// Represents a generic reduce function of the type "(T0, T1, T2, T3, T4, R) -> ". + /// + /// + /// The generic input type of the symbol no. 0. + /// The generic input type of the symbol no. 1. + /// The generic input type of the symbol no. 2. + /// The generic input type of the symbol no. 3. + /// The generic input type of the symbol no. 4. + /// The generic return type of the production. + public sealed class ProductionWrapper + : ProductionWrapperBase> + { + /// + /// Creates a new generic production wrapper based on the given (boxed) production. + /// + /// + /// Boxed production instance. + public ProductionWrapper(IProduction production) + : base(production) + { + } + + public ProductionWrapper SetReduceFunction(Func f) + { + Production.SetReduceFunction(args => f((T0)args[0], (T1)args[1], (T2)args[2], (T3)args[3], (T4)args[4])); + + return this; + } + } + + /// + /// Represents a generic reduce function of the type "(T0, T1, T2, T3, T4, T5, R) -> ". + /// + /// + /// The generic input type of the symbol no. 0. + /// The generic input type of the symbol no. 1. + /// The generic input type of the symbol no. 2. + /// The generic input type of the symbol no. 3. + /// The generic input type of the symbol no. 4. + /// The generic input type of the symbol no. 5. + /// The generic return type of the production. + public sealed class ProductionWrapper + : ProductionWrapperBase> + { + /// + /// Creates a new generic production wrapper based on the given (boxed) production. + /// + /// + /// Boxed production instance. + public ProductionWrapper(IProduction production) + : base(production) + { + } + + public ProductionWrapper SetReduceFunction(Func f) + { + Production.SetReduceFunction(args => f((T0)args[0], (T1)args[1], (T2)args[2], (T3)args[3], (T4)args[4], (T5)args[5])); + + return this; + } + } + + /// + /// Represents a generic reduce function of the type "(T0, T1, T2, T3, T4, T5, T6, R) -> ". + /// + /// + /// The generic input type of the symbol no. 0. + /// The generic input type of the symbol no. 1. + /// The generic input type of the symbol no. 2. + /// The generic input type of the symbol no. 3. + /// The generic input type of the symbol no. 4. + /// The generic input type of the symbol no. 5. + /// The generic input type of the symbol no. 6. + /// The generic return type of the production. + public sealed class ProductionWrapper + : ProductionWrapperBase> + { + /// + /// Creates a new generic production wrapper based on the given (boxed) production. + /// + /// + /// Boxed production instance. + public ProductionWrapper(IProduction production) + : base(production) + { + } + + public ProductionWrapper SetReduceFunction(Func f) + { + Production.SetReduceFunction(args => f((T0)args[0], (T1)args[1], (T2)args[2], (T3)args[3], (T4)args[4], (T5)args[5], (T6)args[6])); + + return this; + } + } + + /// + /// Represents a generic reduce function of the type "(T0, T1, T2, T3, T4, T5, T6, T7, R) -> ". + /// + /// + /// The generic input type of the symbol no. 0. + /// The generic input type of the symbol no. 1. + /// The generic input type of the symbol no. 2. + /// The generic input type of the symbol no. 3. + /// The generic input type of the symbol no. 4. + /// The generic input type of the symbol no. 5. + /// The generic input type of the symbol no. 6. + /// The generic input type of the symbol no. 7. + /// The generic return type of the production. + public sealed class ProductionWrapper + : ProductionWrapperBase> + { + /// + /// Creates a new generic production wrapper based on the given (boxed) production. + /// + /// + /// Boxed production instance. + public ProductionWrapper(IProduction production) + : base(production) + { + } + + public ProductionWrapper SetReduceFunction(Func f) + { + Production.SetReduceFunction(args => f((T0)args[0], (T1)args[1], (T2)args[2], (T3)args[3], (T4)args[4], (T5)args[5], (T6)args[6], (T7)args[7])); + + return this; + } + } + + /// + /// Represents a generic reduce function of the type "(T0, T1, T2, T3, T4, T5, T6, T7, T8, R) -> ". + /// + /// + /// The generic input type of the symbol no. 0. + /// The generic input type of the symbol no. 1. + /// The generic input type of the symbol no. 2. + /// The generic input type of the symbol no. 3. + /// The generic input type of the symbol no. 4. + /// The generic input type of the symbol no. 5. + /// The generic input type of the symbol no. 6. + /// The generic input type of the symbol no. 7. + /// The generic input type of the symbol no. 8. + /// The generic return type of the production. + public sealed class ProductionWrapper + : ProductionWrapperBase> + { + /// + /// Creates a new generic production wrapper based on the given (boxed) production. + /// + /// + /// Boxed production instance. + public ProductionWrapper(IProduction production) + : base(production) + { + } + + public ProductionWrapper SetReduceFunction(Func f) + { + Production.SetReduceFunction(args => f((T0)args[0], (T1)args[1], (T2)args[2], (T3)args[3], (T4)args[4], (T5)args[5], (T6)args[6], (T7)args[7], (T8)args[8])); + + return this; + } + } + + /// + /// Represents a generic reduce function of the type "(T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, R) -> ". + /// + /// + /// The generic input type of the symbol no. 0. + /// The generic input type of the symbol no. 1. + /// The generic input type of the symbol no. 2. + /// The generic input type of the symbol no. 3. + /// The generic input type of the symbol no. 4. + /// The generic input type of the symbol no. 5. + /// The generic input type of the symbol no. 6. + /// The generic input type of the symbol no. 7. + /// The generic input type of the symbol no. 8. + /// The generic input type of the symbol no. 9. + /// The generic return type of the production. + public sealed class ProductionWrapper + : ProductionWrapperBase> + { + /// + /// Creates a new generic production wrapper based on the given (boxed) production. + /// + /// + /// Boxed production instance. + public ProductionWrapper(IProduction production) + : base(production) + { + } + + public ProductionWrapper SetReduceFunction(Func f) + { + Production.SetReduceFunction(args => f((T0)args[0], (T1)args[1], (T2)args[2], (T3)args[3], (T4)args[4], (T5)args[5], (T6)args[6], (T7)args[7], (T8)args[8], (T9)args[9])); + + return this; + } + } + + /// + /// Represents a generic reduce function of the type "(T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, R) -> ". + /// + /// + /// The generic input type of the symbol no. 0. + /// The generic input type of the symbol no. 1. + /// The generic input type of the symbol no. 2. + /// The generic input type of the symbol no. 3. + /// The generic input type of the symbol no. 4. + /// The generic input type of the symbol no. 5. + /// The generic input type of the symbol no. 6. + /// The generic input type of the symbol no. 7. + /// The generic input type of the symbol no. 8. + /// The generic input type of the symbol no. 9. + /// The generic input type of the symbol no. 10. + /// The generic return type of the production. + public sealed class ProductionWrapper + : ProductionWrapperBase> + { + /// + /// Creates a new generic production wrapper based on the given (boxed) production. + /// + /// + /// Boxed production instance. + public ProductionWrapper(IProduction production) + : base(production) + { + } + + public ProductionWrapper SetReduceFunction(Func f) + { + Production.SetReduceFunction(args => f((T0)args[0], (T1)args[1], (T2)args[2], (T3)args[3], (T4)args[4], (T5)args[5], (T6)args[6], (T7)args[7], (T8)args[8], (T9)args[9], (T10)args[10])); + + return this; + } + } + + /// + /// Represents a generic reduce function of the type "(T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, R) -> ". + /// + /// + /// The generic input type of the symbol no. 0. + /// The generic input type of the symbol no. 1. + /// The generic input type of the symbol no. 2. + /// The generic input type of the symbol no. 3. + /// The generic input type of the symbol no. 4. + /// The generic input type of the symbol no. 5. + /// The generic input type of the symbol no. 6. + /// The generic input type of the symbol no. 7. + /// The generic input type of the symbol no. 8. + /// The generic input type of the symbol no. 9. + /// The generic input type of the symbol no. 10. + /// The generic input type of the symbol no. 11. + /// The generic return type of the production. + public sealed class ProductionWrapper + : ProductionWrapperBase> + { + /// + /// Creates a new generic production wrapper based on the given (boxed) production. + /// + /// + /// Boxed production instance. + public ProductionWrapper(IProduction production) + : base(production) + { + } + + public ProductionWrapper SetReduceFunction(Func f) + { + Production.SetReduceFunction(args => f((T0)args[0], (T1)args[1], (T2)args[2], (T3)args[3], (T4)args[4], (T5)args[5], (T6)args[6], (T7)args[7], (T8)args[8], (T9)args[9], (T10)args[10], (T11)args[11])); + + return this; + } + } + + /// + /// Represents a generic reduce function of the type "(T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, R) -> ". + /// + /// + /// The generic input type of the symbol no. 0. + /// The generic input type of the symbol no. 1. + /// The generic input type of the symbol no. 2. + /// The generic input type of the symbol no. 3. + /// The generic input type of the symbol no. 4. + /// The generic input type of the symbol no. 5. + /// The generic input type of the symbol no. 6. + /// The generic input type of the symbol no. 7. + /// The generic input type of the symbol no. 8. + /// The generic input type of the symbol no. 9. + /// The generic input type of the symbol no. 10. + /// The generic input type of the symbol no. 11. + /// The generic input type of the symbol no. 12. + /// The generic return type of the production. + public sealed class ProductionWrapper + : ProductionWrapperBase> + { + /// + /// Creates a new generic production wrapper based on the given (boxed) production. + /// + /// + /// Boxed production instance. + public ProductionWrapper(IProduction production) + : base(production) + { + } + + public ProductionWrapper SetReduceFunction(Func f) + { + Production.SetReduceFunction(args => f((T0)args[0], (T1)args[1], (T2)args[2], (T3)args[3], (T4)args[4], (T5)args[5], (T6)args[6], (T7)args[7], (T8)args[8], (T9)args[9], (T10)args[10], (T11)args[11], (T12)args[12])); + + return this; + } + } + + /// + /// Represents a generic reduce function of the type "(T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, R) -> ". + /// + /// + /// The generic input type of the symbol no. 0. + /// The generic input type of the symbol no. 1. + /// The generic input type of the symbol no. 2. + /// The generic input type of the symbol no. 3. + /// The generic input type of the symbol no. 4. + /// The generic input type of the symbol no. 5. + /// The generic input type of the symbol no. 6. + /// The generic input type of the symbol no. 7. + /// The generic input type of the symbol no. 8. + /// The generic input type of the symbol no. 9. + /// The generic input type of the symbol no. 10. + /// The generic input type of the symbol no. 11. + /// The generic input type of the symbol no. 12. + /// The generic input type of the symbol no. 13. + /// The generic return type of the production. + public sealed class ProductionWrapper + : ProductionWrapperBase> + { + /// + /// Creates a new generic production wrapper based on the given (boxed) production. + /// + /// + /// Boxed production instance. + public ProductionWrapper(IProduction production) + : base(production) + { + } + + public ProductionWrapper SetReduceFunction(Func f) + { + Production.SetReduceFunction(args => f((T0)args[0], (T1)args[1], (T2)args[2], (T3)args[3], (T4)args[4], (T5)args[5], (T6)args[6], (T7)args[7], (T8)args[8], (T9)args[9], (T10)args[10], (T11)args[11], (T12)args[12], (T13)args[13])); + + return this; + } + } + + /// + /// Represents a generic reduce function of the type "(T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, R) -> ". + /// + /// + /// The generic input type of the symbol no. 0. + /// The generic input type of the symbol no. 1. + /// The generic input type of the symbol no. 2. + /// The generic input type of the symbol no. 3. + /// The generic input type of the symbol no. 4. + /// The generic input type of the symbol no. 5. + /// The generic input type of the symbol no. 6. + /// The generic input type of the symbol no. 7. + /// The generic input type of the symbol no. 8. + /// The generic input type of the symbol no. 9. + /// The generic input type of the symbol no. 10. + /// The generic input type of the symbol no. 11. + /// The generic input type of the symbol no. 12. + /// The generic input type of the symbol no. 13. + /// The generic input type of the symbol no. 14. + /// The generic return type of the production. + public sealed class ProductionWrapper + : ProductionWrapperBase> + { + /// + /// Creates a new generic production wrapper based on the given (boxed) production. + /// + /// + /// Boxed production instance. + public ProductionWrapper(IProduction production) + : base(production) + { + } + + public ProductionWrapper SetReduceFunction(Func f) + { + Production.SetReduceFunction(args => f((T0)args[0], (T1)args[1], (T2)args[2], (T3)args[3], (T4)args[4], (T5)args[5], (T6)args[6], (T7)args[7], (T8)args[8], (T9)args[9], (T10)args[10], (T11)args[11], (T12)args[12], (T13)args[13], (T14)args[14])); + + return this; + } + } + + /// + /// Represents a generic reduce function of the type "(T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, R) -> ". + /// + /// + /// The generic input type of the symbol no. 0. + /// The generic input type of the symbol no. 1. + /// The generic input type of the symbol no. 2. + /// The generic input type of the symbol no. 3. + /// The generic input type of the symbol no. 4. + /// The generic input type of the symbol no. 5. + /// The generic input type of the symbol no. 6. + /// The generic input type of the symbol no. 7. + /// The generic input type of the symbol no. 8. + /// The generic input type of the symbol no. 9. + /// The generic input type of the symbol no. 10. + /// The generic input type of the symbol no. 11. + /// The generic input type of the symbol no. 12. + /// The generic input type of the symbol no. 13. + /// The generic input type of the symbol no. 14. + /// The generic input type of the symbol no. 15. + /// The generic return type of the production. + public sealed class ProductionWrapper + : ProductionWrapperBase> + { + /// + /// Creates a new generic production wrapper based on the given (boxed) production. + /// + /// + /// Boxed production instance. + public ProductionWrapper(IProduction production) + : base(production) + { + } + + public ProductionWrapper SetReduceFunction(Func f) + { + Production.SetReduceFunction(args => f((T0)args[0], (T1)args[1], (T2)args[2], (T3)args[3], (T4)args[4], (T5)args[5], (T6)args[6], (T7)args[7], (T8)args[8], (T9)args[9], (T10)args[10], (T11)args[11], (T12)args[12], (T13)args[13], (T14)args[14], (T15)args[15])); + + return this; + } + } +} diff --git a/Piglet/Parser/Configuration/ILexerSettings.cs b/Piglet/Parser/Configuration/ILexerSettings.cs index 494226f..abb750f 100644 --- a/Piglet/Parser/Configuration/ILexerSettings.cs +++ b/Piglet/Parser/Configuration/ILexerSettings.cs @@ -22,6 +22,11 @@ public interface ILexerSettings /// string[] Ignore { get; set; } + /// + /// Specifies whether the lexer is case-insensitive (), or case-sensitive (). + /// + bool IgnoreCase { get; set; } + /// /// Gets and sets the runtime of the constructed lexer. See the enumeration LexerRuntime for an /// explanation of the valid values. diff --git a/Piglet/Parser/Configuration/INonTerminal.cs b/Piglet/Parser/Configuration/INonTerminal.cs index c311302..56fe36f 100644 --- a/Piglet/Parser/Configuration/INonTerminal.cs +++ b/Piglet/Parser/Configuration/INonTerminal.cs @@ -1,15 +1,20 @@ -using System; - namespace Piglet.Parser.Configuration { + public interface INonTerminal + : ISymbol + { + } + /// /// A non terminal in a given grammar, which may be configured to have productions. /// /// Semantic value of tokens in the grammar - public interface INonTerminal : ISymbol + public interface INonTerminal + : ISymbol + , INonTerminal { /// - /// Creates a production on a given nonterminal. The parts parameter may contains either + /// Creates a production on a given non-terminal. The parts parameter may contains either /// previously declared symbols of the grammar or strings, which are interpreted as terminals /// which may be given unescaped as per the lexer settings of the main configurator object. /// If an empty rule is desired you may pass no parameters to the Production. Null must not be passed. diff --git a/Piglet/Parser/Configuration/IParserConfigurator.cs b/Piglet/Parser/Configuration/IParserConfigurator.cs index 8ec2b5f..952fb71 100644 --- a/Piglet/Parser/Configuration/IParserConfigurator.cs +++ b/Piglet/Parser/Configuration/IParserConfigurator.cs @@ -1,4 +1,5 @@ using System; + using Piglet.Parser.Construction; namespace Piglet.Parser.Configuration @@ -11,22 +12,22 @@ namespace Piglet.Parser.Configuration /// Semantic value of tokens public interface IParserConfigurator { - /// - /// Create a new Terminal. If using the built in lexer terminals will be recognized in the order - /// of declaration, unless the topPrecedence is set. A top precedence will be recognized before all other - /// declared terminals. A terminal may not be redefined using different onParse actions. - /// - /// Regular expression to match - /// Action to take on parsing. If null is passed the default action is f => default(T) - /// If true, this terminal takes precedence over previously created terminals - /// A terminal symbol - ITerminal CreateTerminal(string regExp, Func onParse = null, bool topPrecedence = false); - /// - /// Create a new NonTerminal. Production actions may be specified directly, or deferred until later. The + /// Create a new Terminal. If using the built in lexer terminals will be recognized in the order + /// of declaration, unless the topPrecedence is set. A top precedence will be recognized before all other + /// declared terminals. A terminal may not be redefined using different onParse actions. + /// + /// Regular expression to match + /// Action to take on parsing. If null is passed the default action is f => default(T) + /// If true, this terminal takes precedence over previously created terminals + /// The newly created terminal symbol. + ITerminal CreateTerminal(string regExp, Func? onParse = null, bool topPrecedence = false); + + /// + /// Creates a new non-terminal. Production actions may be specified directly, or deferred until later. The /// latter is more typical since rules are often recursive in their nature. /// - /// + /// The newly created non-terminal symbol. INonTerminal CreateNonTerminal(); /// @@ -63,10 +64,67 @@ public interface IParserConfigurator /// Symbols to set associativity on IPrecedenceGroup NonAssociative(params ITerminal[] symbols); + /// + /// Sets the given non-terminal symbol as start symbol + /// + /// Non-terminal symbol + void SetStartSymbol(INonTerminal start); + /// /// Creates a parser based on the inputted configuration. If a lexer has been desired as well, this method will also create the lexer. /// /// The created parser IParser CreateParser(); } + + /// + /// A static class containing generic extension methods for . + /// + public static class Extensions + { + /// + /// Semantic value of tokens. + /// The parser configurator. + /// Regular expression to match. + /// The value stored inside the terminal symbol. + /// If true, this terminal takes precedence over previously created terminals. + /// The newly created terminal symbol. + public static ITerminal CreateTerminal(this IParserConfigurator conf, string regex, T val, bool topPrecedence = false) => + conf.CreateTerminal(regex, _ => val, topPrecedence); + + /// + /// Semantic value of tokens. + /// The parser configurator. + /// The non-terminal symbol's name. + /// The newly created non-terminal symbol. + public static INonTerminal CreateNonTerminal(this IParserConfigurator conf, string name) + { + INonTerminal nter = conf.CreateNonTerminal(); + + nter.DebugName = name; + + return nter; + } + + /// + /// Creates a new reduce production on the given non-terminal symbol. + /// + /// The parameter may contains either + /// previously declared symbols of the grammar or strings, which are interpreted as terminals + /// which may be given unescaped as per the lexer settings of the main configurator object. + /// If an empty rule is desired you may pass no parameters to the Production. + /// + /// Semantic value of tokens. + /// The non-terminal symbol. + /// Parts of rule to configure the production + /// A production configurator for the created production, for addition configuration. + public static IProduction AddReduceProduction(this INonTerminal symb, params object[] args) + { + IProduction prod = symb.AddProduction(args); + + prod.SetReduceToFirst(); + + return prod; + } + } } \ No newline at end of file diff --git a/Piglet/Parser/Configuration/IProduction.cs b/Piglet/Parser/Configuration/IProduction.cs index 9eba9f7..892ea64 100644 --- a/Piglet/Parser/Configuration/IProduction.cs +++ b/Piglet/Parser/Configuration/IProduction.cs @@ -1,4 +1,5 @@ using System; +using Piglet.Lexer.Runtime; using Piglet.Parser.Construction; namespace Piglet.Parser.Configuration @@ -9,6 +10,13 @@ namespace Piglet.Parser.Configuration /// Semantic type of tokens public interface IProduction { + /// + /// Specifies a reduction function to be performed when parsing applies the production rule + /// + /// Function that takes each of the elements in the given rule and returns a new element. Elements in + /// input array are ordered the same way as in the production. + void SetReduceFunction(Func[], T> action); + /// /// Specifies a reduction function to be performed when parsing applies the production rule /// @@ -38,6 +46,6 @@ public interface IProduction /// the Error token as predefined by the configurator. /// /// Error handler function - void SetErrorFunction(Func errorHandler); + void SetErrorFunction(Func[], T> errorHandler); } } \ No newline at end of file diff --git a/Piglet/Parser/Configuration/ISymbol.cs b/Piglet/Parser/Configuration/ISymbol.cs index a148b38..557d577 100644 --- a/Piglet/Parser/Configuration/ISymbol.cs +++ b/Piglet/Parser/Configuration/ISymbol.cs @@ -1,16 +1,21 @@ namespace Piglet.Parser.Configuration { - /// - /// Base class of symbols in the grammar - /// - /// Semantic token value type - public interface ISymbol + public interface ISymbol { /// /// DebugName is exclusively used for debugging purposes, as the name implies. /// Setting the debug name gives an easier-to-read error reporting when a parser /// configuration fails, but it is entirely optional to set this. /// - string DebugName { get; set; } + string? DebugName { get; set; } + } + + /// + /// Base class of symbols in the grammar + /// + /// Semantic token value type + public interface ISymbol + : ISymbol + { } } \ No newline at end of file diff --git a/Piglet/Parser/Configuration/ITerminal.cs b/Piglet/Parser/Configuration/ITerminal.cs index 51fb946..aceef48 100644 --- a/Piglet/Parser/Configuration/ITerminal.cs +++ b/Piglet/Parser/Configuration/ITerminal.cs @@ -8,12 +8,13 @@ namespace Piglet.Parser.Configuration /// expressions. /// /// Semantic token value type - public interface ITerminal : ISymbol + public interface ITerminal + : ISymbol { /// /// Regular expression this terminal recognizes /// - string RegExp { get; } + string? Regex { get; } /// /// OnParse action to take. The input is a string which is the parsed lexeme guaranteed to match diff --git a/Piglet/Parser/Configuration/NonTerminal.cs b/Piglet/Parser/Configuration/NonTerminal.cs index 2ed644c..5f01025 100644 --- a/Piglet/Parser/Configuration/NonTerminal.cs +++ b/Piglet/Parser/Configuration/NonTerminal.cs @@ -1,108 +1,103 @@ -using System; +using System; using System.Collections.Generic; using System.Linq; using System.Text.RegularExpressions; +using Piglet.Lexer.Runtime; using Piglet.Parser.Construction; namespace Piglet.Parser.Configuration { - internal class NonTerminal : Symbol, INonTerminal + internal class NonTerminal + : Symbol + , INonTerminal { private readonly IParserConfigurator configurator; private readonly IList productions; + public NonTerminal(IParserConfigurator configurator) { this.configurator = configurator; + productions = new List(); } - public IEnumerable> ProductionRules - { - get { return productions; } - } + public IEnumerable> ProductionRules => productions; - public IProduction AddProduction(params object[] parts) + public IProduction AddProduction(params object?[] parts) { if (parts.Any(part => !(part is string || part is ISymbol))) - { - throw new ArgumentException("Only string and ISymbol are valid arguments.", "parts"); - } + throw new ArgumentException("Only string and ISymbol are valid arguments.", nameof(parts)); + + NonTerminalProduction nonTerminalProduction = new NonTerminalProduction(configurator, this, parts); - var nonTerminalProduction = new NonTerminalProduction(configurator, this, parts); productions.Add(nonTerminalProduction); return nonTerminalProduction; } - private class NonTerminalProduction : IProduction, IProductionRule + public override string ToString() => + $"{DebugName} --> {string.Join(" | ", from r in ProductionRules select string.Join(" ", from s in r.Symbols select s is ITerminal ? $"'{s.DebugName}'" : s.DebugName))}"; + + + internal sealed class NonTerminalProduction + : IProduction + , IProductionRule { - private readonly ISymbol[] symbols; - private readonly INonTerminal resultSymbol; + private readonly INonTerminal _resultSymbol; + + public ISymbol?[] Symbols { get; } + public ISymbol ResultSymbol => _resultSymbol; + public Func[], T>? ReduceAction { get; private set; } + public IPrecedenceGroup? ContextPrecedence { get; private set; } - public ISymbol[] Symbols { get { return symbols; } } - public ISymbol ResultSymbol { get { return resultSymbol; } } - public Func ReduceAction { get; private set; } - public IPrecedenceGroup ContextPrecedence { get; private set; } - public NonTerminalProduction(IParserConfigurator configurator, INonTerminal resultSymbol, object[] symbols) + public NonTerminalProduction(IParserConfigurator configurator, INonTerminal resultSymbol, object?[] symbols) { - this.resultSymbol = resultSymbol; + _resultSymbol = resultSymbol; // Move production symbols to the list - this.symbols = new ISymbol[symbols.Length]; + Symbols = new ISymbol[symbols.Length]; + int i = 0; - foreach (var part in symbols) + + foreach (object? part in symbols) { - if (part is string) + if (part is string regex) { - var regex = (string)part; if (configurator.LexerSettings.EscapeLiterals) - { regex = Regex.Escape(regex); - } - this.symbols[i] = configurator.CreateTerminal(regex, null, true); - this.symbols[i].DebugName = (string)part; // Set debug name to unescaped string, so it's easy on the eyes. + Symbols[i] = configurator.CreateTerminal(regex, null, true); + + if (Symbols[i] is { } sym) + sym.DebugName = (string)part; // Set debug name to unescaped string, so it's easy on the eyes. } else - { - this.symbols[i] = (ISymbol)symbols[i]; - } + Symbols[i] = (ISymbol?)symbols[i]; + ++i; } } - public void SetReduceFunction(Func action) - { - // This creates a little lambda that ignores the exception - ReduceAction = (e, f) => action(f); - } + public void SetReduceFunction(Func[], T> action) => ReduceAction = (e, f) => action(f);// This creates a little lambda that ignores the exception - public void SetReduceToFirst() - { - SetReduceFunction(f => f[0]); - } + public void SetReduceFunction(Func action) => SetReduceFunction(t => action(t.Select(t => t.SymbolValue).ToArray())); - public void SetReduceToIndex(int index) - { - SetReduceFunction(f => f[index]); - } + public void SetReduceToFirst() => SetReduceFunction(f => f[0]); - public void SetPrecedence(IPrecedenceGroup precedenceGroup) - { - ContextPrecedence = precedenceGroup; - } + public void SetReduceToIndex(int index) => SetReduceFunction(f => f[index]); + + public void SetPrecedence(IPrecedenceGroup precedenceGroup) => ContextPrecedence = precedenceGroup; + + public void SetErrorFunction(Func[], T> errorHandler) => ReduceAction = errorHandler; - public void SetErrorFunction(Func errorHandler) + public override string ToString() { - ReduceAction = errorHandler; + string? to_string(ISymbol? s) => s is ITerminal ? $"'{s.DebugName}'" : s?.DebugName; + + return $"{string.Join(" ", Symbols.Select(to_string))} --> {to_string(ResultSymbol)}"; } } - - public override string ToString() - { - return string.Format("{0} =>", DebugName); - } } } \ No newline at end of file diff --git a/Piglet/Parser/Configuration/ParserConfigurationException.cs b/Piglet/Parser/Configuration/ParserConfigurationException.cs index 6258b71..d7e183b 100644 --- a/Piglet/Parser/Configuration/ParserConfigurationException.cs +++ b/Piglet/Parser/Configuration/ParserConfigurationException.cs @@ -5,7 +5,8 @@ namespace Piglet.Parser.Configuration /// /// This exception is thrown for illegal parser configurations /// - public class ParserConfigurationException : Exception + public class ParserConfigurationException + : Exception { /// /// Construct a new parser configuration exception diff --git a/Piglet/Parser/Configuration/ParserConfigurator.cs b/Piglet/Parser/Configuration/ParserConfigurator.cs index c74947d..6c9a30f 100644 --- a/Piglet/Parser/Configuration/ParserConfigurator.cs +++ b/Piglet/Parser/Configuration/ParserConfigurator.cs @@ -1,37 +1,61 @@ -using System; -using System.Collections.Generic; +using System.Collections.Generic; using System.Linq; -using Piglet.Lexer; +using System; + using Piglet.Lexer.Configuration; using Piglet.Parser.Construction; +using Piglet.Lexer; namespace Piglet.Parser.Configuration { - internal class ParserConfigurator : IParserConfigurator, IGrammar + internal sealed class ParserConfigurator + : IParserConfigurator + , IGrammar { - private NonTerminal startSymbol; - private readonly List> nonTerminals; - private readonly LinkedList> terminals; + private NonTerminal? _startSymbol; + private readonly List> _nonTerminals; + private readonly LinkedList> _terminals; private readonly ILexerSettings lexerSettings; - private readonly List terminalPrecedences; - private int currentPrecedence; + private readonly List _terminalPrecedences; + private int _currentPrecedence; + + + public IProductionRule? Start { get; private set; } + + public ILexerSettings LexerSettings => lexerSettings; - private class TerminalPrecedence : PrecedenceGroup + public ITerminal ErrorToken { get; set; } + + public IEnumerable> ProductionRules => _nonTerminals.SelectMany(nonTerminal => nonTerminal.ProductionRules); + + public IEnumerable> AllSymbols { - public Terminal Terminal { get; set; } + get + { + foreach (Terminal terminal in _terminals) + yield return terminal; + + foreach (NonTerminal nonTerminal in _nonTerminals) + yield return nonTerminal; + } } + public NonTerminal? AcceptSymbol => Start?.ResultSymbol as NonTerminal; + + public Terminal? EndOfInputTerminal { get; set; } + + public ParserConfigurator() { - nonTerminals = new List>(); - terminals = new LinkedList>(); + _nonTerminals = new List>(); + _terminals = new LinkedList>(); lexerSettings = new LexerSettingsImpl(); - terminalPrecedences = new List(); - currentPrecedence = 0; + _terminalPrecedences = new List(); + _currentPrecedence = 0; // Create the Error token. This will create it as terminal 0, but in the end it will be the LAST terminal // second last is EndOfInput. This is sort of hackish and mainly due to the way the lexer is configured. - ErrorToken = CreateTerminal(null, s => default(T)); + ErrorToken = CreateTerminal(null, s => default!); ErrorToken.DebugName = "%ERROR%"; // Set some default settings @@ -40,83 +64,52 @@ public ParserConfigurator() LexerSettings.Ignore = new[] { "\\s+" }; // Ignore all whitespace by default } - private class LexerSettingsImpl : ILexerSettings + public ITerminal CreateTerminal(string? regex, Func? onParse = null, bool topPrecedence = false) { - public LexerSettingsImpl() - { - Runtime = LexerRuntime.Tabular; - } + Terminal terminal = _terminals.SingleOrDefault(f => f.Regex == regex); - public bool CreateLexer { get; set; } - public bool EscapeLiterals { get; set; } - public string[] Ignore { get; set; } - public LexerRuntime Runtime { get; set; } - } - - public ITerminal CreateTerminal(string regExp, Func onParse = null, bool topPrecedence = false) - { - Terminal terminal = terminals.SingleOrDefault(f => f.RegExp == regExp); - if (terminal != null && regExp != null) + if (terminal is { } && regex is { }) { if (terminal.OnParse != (onParse??Terminal.DefaultFunc)) - throw new ParserConfigurationException( - "Redefinition of terminal uses the same regex but different onParse action"); + throw new ParserConfigurationException("Redefinition of terminal uses the same regex but different onParse action"); } else { - terminal = new Terminal(regExp, onParse); - if (topPrecedence) - { - terminals.AddFirst(terminal); - } - else - { - terminals.AddLast(terminal); - } + terminal = new Terminal(regex, onParse); + + if (topPrecedence) + _terminals.AddFirst(terminal); + else + _terminals.AddLast(terminal); } + return terminal; } public INonTerminal CreateNonTerminal() { - var nonTerminal = new NonTerminal(this); - nonTerminals.Add(nonTerminal); + NonTerminal nonTerminal = new NonTerminal(this); + + _nonTerminals.Add(nonTerminal); - if (startSymbol == null) - { + if (_startSymbol == null) // First symbol to be created is the start symbol SetStartSymbol(nonTerminal); - } - return nonTerminal; - } - public ILexerSettings LexerSettings - { - get { return lexerSettings; } + return nonTerminal; } - public ITerminal ErrorToken { get; set; } + public IPrecedenceGroup NonAssociative(params ITerminal[] symbols) => SetSymbolAssociativity(symbols, AssociativityDirection.NonAssociative); - public IPrecedenceGroup NonAssociative(params ITerminal[] symbols) - { - return SetSymbolAssociativity(symbols, AssociativityDirection.NonAssociative); - } + public IPrecedenceGroup RightAssociative(params ITerminal[] symbols) => SetSymbolAssociativity(symbols, AssociativityDirection.Right); - public IPrecedenceGroup RightAssociative(params ITerminal[] symbols) - { - return SetSymbolAssociativity(symbols, AssociativityDirection.Right); - } - - public IPrecedenceGroup LeftAssociative(params ITerminal[] symbols) - { - return SetSymbolAssociativity(symbols, AssociativityDirection.Left); - } + public IPrecedenceGroup LeftAssociative(params ITerminal[] symbols) => SetSymbolAssociativity(symbols, AssociativityDirection.Left); private IPrecedenceGroup SetSymbolAssociativity(IEnumerable> symbols, AssociativityDirection associativityDirection) { - foreach (var terminal in symbols.OfType>()) + foreach (Terminal terminal in symbols.OfType>()) { - if (terminalPrecedences.Any( f => f.Terminal == terminal)) + if (_terminalPrecedences.Any( f => f.Terminal == terminal)) { // This terminal is defined multiple times throw new ParserConfigurationException( @@ -124,73 +117,65 @@ private IPrecedenceGroup SetSymbolAssociativity(IEnumerable> symbol terminal.DebugName)); } - terminalPrecedences.Add(new TerminalPrecedence - { - Associativity = associativityDirection, - Terminal = terminal, - Precedence = currentPrecedence - }); + _terminalPrecedences.Add(new TerminalPrecedence + { + Associativity = associativityDirection, + Terminal = terminal, + Precedence = _currentPrecedence + }); } - var group = new PrecedenceGroup { Precedence = currentPrecedence }; - ++currentPrecedence; + PrecedenceGroup group = new PrecedenceGroup { Precedence = _currentPrecedence }; + + ++_currentPrecedence; return group; } - public void SetStartSymbol(INonTerminal start) - { - startSymbol = (NonTerminal) start; - } + public void SetStartSymbol(INonTerminal start) => _startSymbol = (NonTerminal)start; public void AugmentGrammar() { // First we need to augment the grammar with a start rule and a new start symbol // Create the derived start symbol - var augmentedStart = (NonTerminal)CreateNonTerminal(); // Unfortunate cast... + NonTerminal augmentedStart = (NonTerminal)CreateNonTerminal(); // Unfortunate cast... // Use the start symbols debug name with a ' in front to indicate the augmented symbol. - augmentedStart.DebugName = "'" + startSymbol.DebugName; + augmentedStart.DebugName = "'" + _startSymbol.DebugName; // Create a single production - augmentedStart.AddProduction(startSymbol); // This production is never reduced, parser accepts when its about to reduce. No reduce action. + augmentedStart.AddProduction(_startSymbol); // This production is never reduced, parser accepts when its about to reduce. No reduce action. Start = augmentedStart.ProductionRules.First(); // There's only one production. // Make sure all the terminals are registered. // This becomes neccessary since the user can configure the parser using only strings. // Since the nonterminal used for that does not carry a back-reference to the configurator, // we do it this way. - // TODO: Does the terminals.AddLast ever get called? This looks like dead code to me, apart from the sanity - // TODO: check for redefinition. Which even that gets done someplace else. - foreach (var nonTerminal in nonTerminals) - { - foreach (var terminal in nonTerminal.ProductionRules.SelectMany(f => f.Symbols).OfType>()) + // TODO: Does the terminals.AddLast ever get called? This looks like dead code to me, apart from the sanity + // TODO: check for redefinition. Which even that gets done someplace else. + foreach (NonTerminal nonTerminal in _nonTerminals) + foreach (Terminal terminal in nonTerminal.ProductionRules.SelectMany(f => f.Symbols).OfType>()) { - var oldTerminal = terminals.SingleOrDefault(f => f.RegExp == terminal.RegExp); + Terminal oldTerminal = _terminals.SingleOrDefault(f => f.Regex == terminal.Regex); if (oldTerminal != null) { if (oldTerminal.OnParse != terminal.OnParse) - { throw new ParserConfigurationException( "Multiply defined terminal has more than one OnParse action"); - } } else - { - terminals.AddLast(terminal); - } + _terminals.AddLast(terminal); } - } // Add the end of input symbol - EndOfInputTerminal = (Terminal) CreateTerminal(null, s => default(T)); + EndOfInputTerminal = (Terminal)CreateTerminal(null, s => default); EndOfInputTerminal.DebugName = "%EOF%"; // Move the error symbol to the end of the list // Hackish I know, but it guarantees that the ErrorToken is always created and that 0 -> n-2 are reserved // for the REAL symbols in the grammar. - terminals.Remove((Terminal) ErrorToken); - terminals.AddLast((Terminal) ErrorToken); + _terminals.Remove((Terminal) ErrorToken); + _terminals.AddLast((Terminal) ErrorToken); // Assign all tokens in the grammar token numbers! AssignTokenNumbers(); @@ -198,76 +183,49 @@ public void AugmentGrammar() // This class is now a valid implementation of IGrammar, ready to use. } - public ILexer CreateLexer() - { - // User wants a default lexer, great. Use the lexer from grammar factory - // to fix him up - return LexerFactory.ConfigureFromGrammar(this, LexerSettings); - } + // User wants a default lexer, great. Use the lexer from grammar factory to fix him up + public ILexer CreateLexer() => LexerFactory.ConfigureFromGrammar(this, LexerSettings); public IParser CreateParser() { - if (Start == null) - { - // User has forgotten to augment the grammar. Lets help him out and do it - // for him + // User has forgotten to augment the grammar. Lets help him out and do it for him + if (Start is null) AugmentGrammar(); - } - var parser = new ParserBuilder(this).CreateParser(); + IParser parser = new ParserBuilder(this).CreateParser(); - // If our lexer settings says that we are supposed to create a lexer, do so now and assign - // the lexer to the created parser. + // If our lexer settings says that we are supposed to create a lexer, do so now and assign the lexer to the created parser. if (LexerSettings.CreateLexer) - { parser.Lexer = CreateLexer(); - } return parser; } - public IProductionRule Start { get; private set; } + public IPrecedenceGroup GetPrecedence(ITerminal terminal) => _terminalPrecedences.FirstOrDefault(f => f.Terminal == terminal); - public IEnumerable> ProductionRules - { - get { return nonTerminals.SelectMany(nonTerminal => nonTerminal.ProductionRules); } - } - - public IEnumerable> AllSymbols + private void AssignTokenNumbers() { - get - { - foreach (var terminal in terminals) - { - yield return terminal; - } - - foreach (var nonTerminal in nonTerminals) - { - yield return nonTerminal; - } - } - } + int t = 0; - public NonTerminal AcceptSymbol - { - get { return (NonTerminal)Start.ResultSymbol; } + foreach (ISymbol symbol in AllSymbols) + ((Symbol)symbol).TokenNumber = t++; } - public Terminal EndOfInputTerminal { get; set; } - public IPrecedenceGroup GetPrecedence(ITerminal terminal) + private sealed class TerminalPrecedence + : PrecedenceGroup { - return terminalPrecedences.FirstOrDefault(f => f.Terminal == terminal); + public Terminal? Terminal { get; set; } } - private void AssignTokenNumbers() + private sealed class LexerSettingsImpl + : ILexerSettings { - int t = 0; - foreach (var symbol in AllSymbols) - { - ((Symbol)symbol).TokenNumber = t++; - } + public bool CreateLexer { get; set; } + public bool EscapeLiterals { get; set; } + public string[] Ignore { get; set; } = Array.Empty(); + public bool IgnoreCase { get; set; } + public LexerRuntime Runtime { get; set; } = LexerRuntime.Tabular; } } } diff --git a/Piglet/Parser/Configuration/Symbol.cs b/Piglet/Parser/Configuration/Symbol.cs index 61e54c3..f08e019 100644 --- a/Piglet/Parser/Configuration/Symbol.cs +++ b/Piglet/Parser/Configuration/Symbol.cs @@ -1,8 +1,9 @@ namespace Piglet.Parser.Configuration { - internal class Symbol : ISymbol + internal class Symbol + : ISymbol { - public string DebugName { get; set; } + public string? DebugName { get; set; } public int TokenNumber { get; set; } } } diff --git a/Piglet/Parser/Configuration/Terminal.cs b/Piglet/Parser/Configuration/Terminal.cs index 24d9d03..f52ed16 100644 --- a/Piglet/Parser/Configuration/Terminal.cs +++ b/Piglet/Parser/Configuration/Terminal.cs @@ -2,27 +2,22 @@ namespace Piglet.Parser.Configuration { - internal class Terminal : Symbol, ITerminal + internal sealed class Terminal + : Symbol + , ITerminal { - public static readonly Func DefaultFunc = f => default(T); - public string RegExp { get; private set; } + public string? Regex { get; private set; } public Func OnParse { get; private set; } - public Terminal(string regExp, Func onParse) - { - if (onParse == null) - { - onParse = DefaultFunc; - } + public static readonly Func DefaultFunc = f => default; - OnParse = onParse; - RegExp = regExp; - DebugName = RegExp; - } - public override string ToString() + public Terminal(string? regex, Func? onParse) { - return string.Format("{0}{{{1}}} - {2}", DebugName, RegExp, OnParse); + OnParse = onParse ?? DefaultFunc; + Regex = DebugName = regex; } + + public override string ToString() => $"{DebugName} {{{Regex}}}"; } } \ No newline at end of file diff --git a/Piglet/Parser/Construction/AmbiguousGrammarException.cs b/Piglet/Parser/Construction/AmbiguousGrammarException.cs index a2c72d8..5bcf684 100644 --- a/Piglet/Parser/Construction/AmbiguousGrammarException.cs +++ b/Piglet/Parser/Construction/AmbiguousGrammarException.cs @@ -5,7 +5,8 @@ namespace Piglet.Parser.Construction /// /// Base class for exceptions thrown by the parser generator for ambiguous grammars. /// - public class AmbiguousGrammarException : ParserConfigurationException + public class AmbiguousGrammarException + : ParserConfigurationException { internal AmbiguousGrammarException(string message) : base (message) diff --git a/Piglet/Parser/Construction/Debug/DotNotation.cs b/Piglet/Parser/Construction/Debug/DotNotation.cs index b989a02..5dddbe3 100644 --- a/Piglet/Parser/Construction/Debug/DotNotation.cs +++ b/Piglet/Parser/Construction/Debug/DotNotation.cs @@ -1,6 +1,4 @@ -using System; -using System.Collections.Generic; -using System.Linq; +using System.Collections.Generic; using System.Text; namespace Piglet.Parser.Construction.Debug @@ -9,16 +7,11 @@ internal static class DotNotation { internal static string AsDotNotation(this IEnumerable.GotoSetTransition> transitions, List> itemSets) { - var graph = new StringBuilder(); + StringBuilder graph = new StringBuilder(); graph.Append("digraph goto {"); - foreach (var transition in transitions) - { - graph.Append(string.Format("\t\"I{0}\" -> \"I{1}\" [label=\"{2}\"]\n", - itemSets.IndexOf(transition.From), - itemSets.IndexOf(transition.To), - (transition.OnSymbol.DebugName??"").Replace("\\", "\\\\").Replace("\"", "\\\""))); - } + foreach (ParserBuilder.GotoSetTransition transition in transitions) + graph.Append($"\t\"I{itemSets.IndexOf(transition.From)}\" -> \"I{itemSets.IndexOf(transition.To)}\" [label=\"{(transition.OnSymbol.DebugName ?? "").Replace("\\", "\\\\").Replace("\"", "\\\"")}\"]\n"); graph.Append("}"); diff --git a/Piglet/Parser/Construction/Debug/ParseTableToString.cs b/Piglet/Parser/Construction/Debug/ParseTableToString.cs index f3d58d6..b66af15 100644 --- a/Piglet/Parser/Construction/Debug/ParseTableToString.cs +++ b/Piglet/Parser/Construction/Debug/ParseTableToString.cs @@ -1,5 +1,6 @@ using System.Linq; using System.Text; + using Piglet.Parser.Configuration; namespace Piglet.Parser.Construction.Debug @@ -11,55 +12,53 @@ internal static string ToDebugString(this IParseTable table, IGrammar g int numTokens = grammar.AllSymbols.Count() - 1; int numTerminals = grammar.AllSymbols.OfType>().Count(); - var formatString = new StringBuilder("{0,8}|"); + StringBuilder formatString = new StringBuilder("{0,8}|"); + for (int i = 0; i < numTokens; ++i) { if (i == numTerminals) formatString.Append("|"); // Extra bar to separate actions and gotos + formatString.Append("|{" + (i + 1) + ",8}"); } + formatString.Append("|\n"); + string format = formatString.ToString(); - var sb = new StringBuilder(); - sb.Append(string.Format(format, new[] { "STATE" }.Concat(grammar.AllSymbols.Select(f => f.DebugName)).ToArray())); - for (int i = 0; i f.DebugName ?? "")).ToArray())); + + for (int i = 0; i < numStates; ++i) { object[] formatParams = new[] { i.ToString() }.Concat(grammar.AllSymbols.OfType>().Select(f => { - var actionValue = table.Action[i, f.TokenNumber]; + int actionValue = table.Action?[i, f.TokenNumber] ?? short.MinValue; + if (actionValue == short.MaxValue) - { return "acc"; - } - - if (actionValue == short.MinValue) - { + else if (actionValue == short.MinValue) return ""; - } - - if (actionValue < 0) - { + else if (actionValue < 0) return "r" + -(actionValue + 1); - } - - return "s" + actionValue; - }).Concat(grammar.AllSymbols.OfType>().Where(f => f.ProductionRules.All(p => p.ResultSymbol != grammar.AcceptSymbol)).Select(f => table.Goto[i, f.TokenNumber - numTerminals] == - short.MinValue + else + return "s" + actionValue; + }).Concat(grammar.AllSymbols + .OfType>() + .Where(f => f.ProductionRules.All(p => p.ResultSymbol != grammar.AcceptSymbol)) + .Select(f => table.Goto[i, f.TokenNumber - numTerminals] == short.MinValue ? "" : table.Goto[i, f.TokenNumber - numTerminals].ToString()))).ToArray(); - + // If formatparams is all empty, we have run out of table to process. - // This is perhaps not the best way to determine if the table has ended but the grammar - // has no idea of the number of states, and I'd rather not mess up the interface - // with methods to get the number of states. + // This is perhaps not the best way to determine if the table has ended but the grammar has no idea of the + // number of states, and I'd rather not mess up the interface with methods to get the number of states. if (formatParams.Distinct().Count() == 2) - { - // All empty strings and one state. - break; - } + break; // All empty strings and one state. sb.Append(string.Format(format, formatParams)); } + return sb.ToString(); } } diff --git a/Piglet/Parser/Construction/GotoTable.cs b/Piglet/Parser/Construction/GotoTable.cs index 13d9761..3ef1a03 100644 --- a/Piglet/Parser/Construction/GotoTable.cs +++ b/Piglet/Parser/Construction/GotoTable.cs @@ -1,12 +1,13 @@ -using System; -using System.Collections; using System.Collections.Generic; using System.Linq; +using System; + using Piglet.Common; namespace Piglet.Parser.Construction { - internal class GotoTable : ITable2D + internal sealed class GotoTable + : ITable2D { /// /// Only for input to the constructor @@ -18,28 +19,28 @@ public struct GotoTableValue public int NewState; }; - private readonly short[] stateDictionary; - private readonly short[] gotoValues; + private readonly short[] _stateDictionary; + private readonly short[] _gotoValues; + public GotoTable(IList gotos) { // Gather the most common gotos for each token. - var maxToken = gotos.Max(f => f.Token) + 1; - + int maxToken = gotos.Max(f => f.Token) + 1; + // Get the most common gotos and store them in the start - var defaultGotos = GetMostCommonGotos(gotos, maxToken); + short[] defaultGotos = GetMostCommonGotos(gotos, maxToken); - // Iterate through the states, and find out where the default GOTOs are not applicable - // for those states, store an offset - stateDictionary = new short[gotos.Max(f => f.State) + 1]; // Need not store more than nStates+1 of the maximum referenced state + // Iterate through the states, and find out where the default GOTOs are not applicable for those states, store an offset + _stateDictionary = new short[gotos.Max(f => f.State) + 1]; // Need not store more than nStates+1 of the maximum referenced state // Holds the gotos for a given state, allocated outside of loop for performance reasons. - var stateGotos = new short[maxToken]; + short[] stateGotos = new short[maxToken]; // Stategotos now holds only 0, which is what we want (every state points to defaultGotos) - var offsets = new List(defaultGotos); // The offsets is where we will store the default gotos in the end + List offsets = new List(defaultGotos); // The offsets is where we will store the default gotos in the end - foreach ( var state in gotos.Select(f => f.State).Distinct()) + foreach (int state in gotos.Select(f => f.State).Distinct()) { // Assemble the goto list @@ -49,17 +50,16 @@ public GotoTable(IList gotos) // For each gotoitem, set the stateGoto appropritately. int state1 = state; - var gotosForState = gotos.Where(f => f.State == state1).ToList(); - foreach (var gotoItem in gotosForState) - { - stateGotos[gotoItem.Token] = (short) gotoItem.NewState; - } + List gotosForState = gotos.Where(f => f.State == state1).ToList(); + + foreach (GotoTableValue gotoItem in gotosForState) + stateGotos[gotoItem.Token] = (short)gotoItem.NewState; // Compare the state gotos to the default gotos. If they are the same, don't change a thing. int firstMisMatchIndex = -1; int lastMisMatchIndex = -1; + for (int i = 0; i < defaultGotos.Length; ++i) - { if (stateGotos[i] != defaultGotos[i]) { // Mismatch, we will need to create things in the gotoValues table @@ -68,13 +68,13 @@ public GotoTable(IList gotos) firstMisMatchIndex = gotosForState.Min(f => f.Token); lastMisMatchIndex = gotosForState.Max(f => f.Token); } - } // If we have a mismatch we need to find a match for the sublist in question. if (firstMisMatchIndex != -1) { - var sublist = stateGotos.Skip(firstMisMatchIndex).Take(lastMisMatchIndex - firstMisMatchIndex + 1).ToList(); - var offsetIndex = offsets.IndexOf(sublist); + List sublist = stateGotos.Skip(firstMisMatchIndex).Take(lastMisMatchIndex - firstMisMatchIndex + 1).ToList(); + int offsetIndex = offsets.IndexOf(sublist); + if (offsetIndex == -1) { // Not found. Add entire sublist to the end @@ -84,33 +84,52 @@ public GotoTable(IList gotos) // Set the offset index. This is offsetted by the first mismatch since those tokens will never be called, so they // can be whatever. We're not using the entire list to look for the submatch. - stateDictionary[state] = (short)(offsetIndex - firstMisMatchIndex); + _stateDictionary[state] = (short)(offsetIndex - firstMisMatchIndex); } } // Remove the list and condense into array for fast use once parsing starts - gotoValues = offsets.ToArray(); + _gotoValues = offsets.ToArray(); } private static short[] GetMostCommonGotos(IEnumerable gotos, int maxToken) { - var defaultGotos = new short[maxToken]; - var gotoCounts = new Dictionary, int>(); - foreach (var g in gotos) + Dictionary<(int token, int state), int> gotoCounts = new Dictionary<(int token, int state), int>(); + short[] defaultGotos = new short[maxToken]; + + foreach (GotoTableValue g in gotos) { - var t = new Tuple(g.Token, g.NewState); + (int, int) t = (g.Token, g.NewState); if (!gotoCounts.ContainsKey(t)) gotoCounts.Add(t, 0); - gotoCounts[t] = gotoCounts[t] + 1; + + ++gotoCounts[t]; } + List unassigned = new List(); + // For every token in the grammar, store the most stored count as the default goto for (int t = 0; t < maxToken; ++t) { - var mostCommonNewState = gotoCounts.Where(f => f.Key.Item1 == t).OrderBy(f => -f.Value).Select(f => f.Key.Item2); - defaultGotos[t] = (short) mostCommonNewState.First(); + int[] def = (from f in gotoCounts + where f.Key.token == t + orderby -f.Value + select f.Key.state).ToArray(); + + if (def.Length == 0) + unassigned.Add(t); + else + defaultGotos[t] = (short)def[0]; } + + foreach (int t in unassigned) + defaultGotos[t] = (from f in gotoCounts + let i2 = (short)f.Key.state + where !defaultGotos.Contains(i2) + orderby -f.Value + select i2).FirstOrDefault(); + return defaultGotos; } @@ -120,16 +139,17 @@ private static short[] GetMostCommonGotos(IEnumerable gotos, int { // This check is really unneccessary since the parser will never access outside of the legal state list // but for now the debug printer will. So that is why we check for the state bounds - if (state >= stateDictionary.Length) + if (state >= _stateDictionary.Length) return short.MinValue; // Nothing to see here. - + // Index into goto values. - var offsetIndex = stateDictionary[state] + input; + int offsetIndex = _stateDictionary[state] + input; // Also an unnecessary check if it wasn't for the debugging feature - if (offsetIndex >= gotoValues.Length) + if (offsetIndex >= _gotoValues.Length) return short.MinValue; - return gotoValues[offsetIndex]; + + return _gotoValues[offsetIndex]; } } } diff --git a/Piglet/Parser/Construction/IParseTable.cs b/Piglet/Parser/Construction/IParseTable.cs index 48955ad..bed922a 100644 --- a/Piglet/Parser/Construction/IParseTable.cs +++ b/Piglet/Parser/Construction/IParseTable.cs @@ -11,17 +11,17 @@ public interface IParseTable /// /// Get the action table for this parser /// - ITable2D Action { get; } + ITable2D? Action { get; } /// /// Get the goto table for this parser /// - ITable2D Goto { get; } + ITable2D? Goto { get; } /// /// Get the reduction rules /// - IReductionRule[] ReductionRules { get; set; } + IReductionRule[]? ReductionRules { get; set; } /// /// Total number of states used by the parser diff --git a/Piglet/Parser/Construction/IProductionRule.cs b/Piglet/Parser/Construction/IProductionRule.cs index 9410e77..0bc07c6 100644 --- a/Piglet/Parser/Construction/IProductionRule.cs +++ b/Piglet/Parser/Construction/IProductionRule.cs @@ -1,13 +1,15 @@ using System; + using Piglet.Parser.Configuration; +using Piglet.Lexer.Runtime; namespace Piglet.Parser.Construction { internal interface IProductionRule { - ISymbol[] Symbols { get; } + ISymbol?[] Symbols { get; } ISymbol ResultSymbol { get; } - Func ReduceAction { get; } - IPrecedenceGroup ContextPrecedence { get; } + Func[], T>? ReduceAction { get; } + IPrecedenceGroup? ContextPrecedence { get; } } } \ No newline at end of file diff --git a/Piglet/Parser/Construction/IReductionRule.cs b/Piglet/Parser/Construction/IReductionRule.cs index 5b1e9e8..351ad47 100644 --- a/Piglet/Parser/Construction/IReductionRule.cs +++ b/Piglet/Parser/Construction/IReductionRule.cs @@ -1,27 +1,34 @@ using System; +using Piglet.Lexer.Runtime; +using Piglet.Parser.Configuration; + namespace Piglet.Parser.Construction { /// - /// A rule which can be applied on a reduction + /// A rule which can be applied on a reduction. /// /// Parser value type public interface IReductionRule { /// - /// Number of tokens to pop from the parsing stack when rule is applied + /// The non-terminal symbol, to which the current rule will be reduced. + /// + INonTerminal ReductionSymbol { get; } + + /// + /// Number of tokens to pop from the parsing stack when rule is applied. /// int NumTokensToPop { get; } /// - /// The token number of the resulting symbol to push on the parse stack + /// The token number of the resulting symbol to push on the parse stack. /// int TokenToPush { get; } /// - /// The reduction function to apply. This may also handle an exception in the case - /// of error recovery. The exception parameter will be null if no error has occurred. + /// The reduction function to apply. This may also handle an exception in the case of error recovery. The exception parameter will be null if no error has occurred. /// - Func OnReduce { get; } + Func[], T> OnReduce { get; } } } \ No newline at end of file diff --git a/Piglet/Parser/Construction/LRParseTable.cs b/Piglet/Parser/Construction/LRParseTable.cs index de3fb0c..3b4e0c0 100644 --- a/Piglet/Parser/Construction/LRParseTable.cs +++ b/Piglet/Parser/Construction/LRParseTable.cs @@ -1,32 +1,20 @@ -using System.Collections.Generic; using Piglet.Common; namespace Piglet.Parser.Construction { - internal class LRParseTable : IParseTable + internal sealed class LRParseTable + : IParseTable { - public ITable2D Action { get; internal set; } - public ITable2D Goto { get; internal set; } - public IReductionRule[] ReductionRules { get; set; } - + public ITable2D? Action { get; internal set; } + public ITable2D? Goto { get; internal set; } + public IReductionRule[]? ReductionRules { get; set; } public int StateCount { get; set; } - public static short Shift(int stateToChangeTo) - { - // Shift is positive integers - return (short) stateToChangeTo; - } - - public static short Reduce(int reductionRule) - { - // Reduce is negative integers - // with -1 to not conflict with a possible shift to state 0 - return (short)-(reductionRule + 1); - } - public static short Accept() - { - return short.MaxValue; // Max means accept - } + public static short Shift(int stateToChangeTo) => (short)stateToChangeTo; // Shift is positive integers + + public static short Reduce(int reductionRule) => (short)-(reductionRule + 1);// Reduce is negative integers with -1 to not conflict with a possible shift to state 0 + + public static short Accept() => short.MaxValue; // Max means accept } } \ No newline at end of file diff --git a/Piglet/Parser/Construction/Lr0Item.cs b/Piglet/Parser/Construction/Lr0Item.cs index d0861ee..65dfc31 100644 --- a/Piglet/Parser/Construction/Lr0Item.cs +++ b/Piglet/Parser/Construction/Lr0Item.cs @@ -8,14 +8,8 @@ internal class Lr0Item public IProductionRule ProductionRule { get; private set; } public int DotLocation { get; private set; } - public ISymbol SymbolRightOfDot - { - get { - if (DotLocation < ProductionRule.Symbols.Length) - return ProductionRule.Symbols[DotLocation]; - return null; - } - } + public ISymbol? SymbolRightOfDot => DotLocation < (ProductionRule.Symbols?.Length ?? 0) ? ProductionRule.Symbols?[DotLocation] : null; + public Lr0Item(IProductionRule productionRule, int dotLocation) { @@ -23,27 +17,30 @@ public Lr0Item(IProductionRule productionRule, int dotLocation) ProductionRule = productionRule; } - public override string ToString() { - var sb = new StringBuilder(); + StringBuilder sb = new StringBuilder(); + sb.Append(ProductionRule.ResultSymbol.DebugName); sb.Append(" -> "); + bool dotAdded = false; - for (int i = 0; i < ProductionRule.Symbols.Length; ++i ) + + for (int i = 0; i < (ProductionRule.Symbols?.Length ?? 0); ++i ) { if (i == DotLocation) { sb.Append("• "); dotAdded = true; } - sb.Append(ProductionRule.Symbols[i].DebugName); + + sb.Append(ProductionRule.Symbols?[i]?.DebugName); sb.Append(" "); } + if (!dotAdded) - { - sb.Append("•"); - } + sb.Append("•"); + return sb.ToString(); } } diff --git a/Piglet/Parser/Construction/Lr1ItemSet.cs b/Piglet/Parser/Construction/Lr1ItemSet.cs index 212581e..d82431c 100644 --- a/Piglet/Parser/Construction/Lr1ItemSet.cs +++ b/Piglet/Parser/Construction/Lr1ItemSet.cs @@ -8,30 +8,21 @@ internal class Lr1ItemSet : IEnumerable> { public List> Items { get; private set; } - public Lr1ItemSet() - { - Items = new List>(); - } + public Lr1ItemSet() => Items = new List>(); - public Lr1ItemSet(IEnumerable> lr1Items) - { - Items = new List>(lr1Items); - } + public Lr1ItemSet(IEnumerable> lr1Items) => Items = new List>(lr1Items); - public override string ToString() - { - return string.Join("\n", Items); - } + public override string ToString() => string.Join("\n", Items); public bool Add(Lr1Item item) { // See if there already exists an item with the same core - var oldItem = Items.FirstOrDefault(f => f.ProductionRule == item.ProductionRule && f.DotLocation == item.DotLocation); + Lr1Item oldItem = Items.FirstOrDefault(f => f.ProductionRule == item.ProductionRule && f.DotLocation == item.DotLocation); if (oldItem != null) { // There might be lookaheads that needs adding bool addedLookahead = false; - foreach (var lookahead in item.Lookaheads) + foreach (Configuration.Terminal lookahead in item.Lookaheads) { addedLookahead |= oldItem.Lookaheads.Add(lookahead); } @@ -42,15 +33,9 @@ public bool Add(Lr1Item item) return true; } - public IEnumerator> GetEnumerator() - { - return Items.GetEnumerator(); - } + public IEnumerator> GetEnumerator() => Items.GetEnumerator(); - IEnumerator IEnumerable.GetEnumerator() - { - return Items.GetEnumerator(); - } + IEnumerator IEnumerable.GetEnumerator() => Items.GetEnumerator(); public Lr1Item this[int index] { @@ -70,9 +55,9 @@ public bool CoreEquals(Lr1ItemSet other) public void MergeLookaheads(Lr1ItemSet other) { - foreach (var lr1Item in Items) + foreach (Lr1Item lr1Item in Items) { - var otherRule = other.First(f => f.ProductionRule == lr1Item.ProductionRule && f.DotLocation == lr1Item.DotLocation); + Lr1Item otherRule = other.First(f => f.ProductionRule == lr1Item.ProductionRule && f.DotLocation == lr1Item.DotLocation); lr1Item.Lookaheads.UnionWith(otherRule.Lookaheads); } } diff --git a/Piglet/Parser/Construction/ParserBuilder.cs b/Piglet/Parser/Construction/ParserBuilder.cs index 29f1469..7301eb6 100644 --- a/Piglet/Parser/Construction/ParserBuilder.cs +++ b/Piglet/Parser/Construction/ParserBuilder.cs @@ -1,536 +1,460 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using Piglet.Common; -using Piglet.Parser.Configuration; -using Piglet.Parser.Construction.Debug; - -namespace Piglet.Parser.Construction -{ - internal class ParserBuilder - { - private readonly IGrammar grammar; - - // Holds the generated reduction rules, which we'll feed the table at the end of this method - // the second part at least, the other is for indexing them while making the table. - private readonly List, ReductionRule>> reductionRules; - - public ParserBuilder(IGrammar grammar) - { - this.grammar = grammar; - this.reductionRules = new List, ReductionRule>>(); - } - - internal sealed class GotoSetTransition - { - public Lr1ItemSet From { get; set; } - public Lr1ItemSet To { get; set; } - public ISymbol OnSymbol { get; set; } - } - - internal IParser CreateParser() - { - // First order of business is to create the canonical list of LR1 states, or at least we are going to go through - // them as we merge the sets together. - // This starts with augmenting the grammar with an accept symbol, then we derive the - // grammar from that - var start = grammar.Start; - - // Get the first and follow sets for all nonterminal symbols - var nullable = CalculateNullable(); - var first = CalculateFirst(nullable); - - // So, we are going to calculate the LR1 closure for the start symbol, which should - // be the augmented accept state of the grammar. - // The closure is all states which are accessible by the dot at the left hand side of the - // item. - var itemSets = new List> - { - Closure(new List> - { - new Lr1Item(start, 0, new HashSet> {grammar.EndOfInputTerminal}) - }, first, nullable) - }; - var gotoSetTransitions = new List(); - - // Repeat until nothing gets added any more - // This is neccessary since we are merging sets as we go, which changes things around. - bool added; - do - { - added = false; - for (int i = 0; i < itemSets.Count(); ++i) - { - var itemSet = itemSets[i]; - - foreach (var symbol in grammar.AllSymbols) - { - // Calculate the itemset for by goto for each symbol in the grammar - var gotoSet = Goto(itemSet, symbol); - - // If there is anything found in the set - if (gotoSet.Any()) - { - // Do a closure on the goto set and see if it's already present in the sets of items that we have - // if that is not the case add it to the item set - gotoSet = Closure(gotoSet, first, nullable); - - var oldGotoSet = itemSets.FirstOrDefault(f => f.CoreEquals(gotoSet)); - - if (oldGotoSet == null) - { - // Add goto set to itemsets - itemSets.Add(gotoSet); - - // Add a transition - gotoSetTransitions.Add(new GotoSetTransition - { - From = itemSet, - OnSymbol = symbol, - To = gotoSet - }); - added = true; - } - else - { - // Already found the set - // Merge the lookaheads for all rules - oldGotoSet.MergeLookaheads(gotoSet); - - // Add a transition if it already isn't there - var nt = new GotoSetTransition - { - From = itemSet, - OnSymbol = symbol, - To = oldGotoSet - }; - if (!gotoSetTransitions.Any( - a => a.From == nt.From && a.OnSymbol == nt.OnSymbol && a.To == nt.To)) - { - gotoSetTransitions.Add(nt); - } - } - } - } - } - } while (added); - - LRParseTable parseTable = CreateParseTable(itemSets, gotoSetTransitions); - - // Create a new parser using that parse table and some additional information that needs - // to be available for the runtime parsing to work. - return new LRParser(parseTable, - ((Terminal)grammar.ErrorToken).TokenNumber, - grammar.EndOfInputTerminal.TokenNumber, - grammar.AllSymbols.OfType>().Select(f => f.DebugName).ToArray()); - } - - private ISet> CalculateNullable() - { - // TODO: This is a naïve implementation that keeps iterating until the set becomes stable - // TODO: This could probably be optimized. - - // A nullable symbol is a symbol that may consist of only epsilon transitions - var nullable = new HashSet>(); - - bool nullableSetChanged; - - do - { - nullableSetChanged = false; - foreach (var nonTerminal in grammar.AllSymbols.OfType>()) - { - // No need to reevaluate things we know to be nullable. - if (nullable.Contains(nonTerminal)) - continue; - - foreach (var production in nonTerminal.ProductionRules) - { - // If this production is nullable, add the nonterminal to the set. - - // Iterate over symbols. If we find a terminal it is never nullable - // if we find a nonterminal continue iterating only if this terminal itself is not nullable. - // By this rule, empty production rules will always return nullable true - bool symbolIsNullable = production.Symbols.All(symbol => !(symbol is Terminal) && nullable.Contains((NonTerminal)symbol)); - - if (symbolIsNullable) - { - nullableSetChanged |= nullable.Add(nonTerminal); - } - } - } - } while (nullableSetChanged); - - return nullable; - } - - private LRParseTable CreateParseTable(List> itemSets, List gotoSetTransitions) - { - var table = new LRParseTable(); - - // Create a temporary uncompressed action table. This is what we will use to create - // the compressed action table later on. This could probably be improved upon to save - // memory if needed. - var uncompressedActionTable = new short[itemSets.Count, grammar.AllSymbols.OfType>().Count()]; - for (int i = 0; i < itemSets.Count(); ++i) - for (int j = 0; j < grammar.AllSymbols.OfType>().Count(); ++j) - uncompressedActionTable[i, j] = short.MinValue; - - int firstNonTerminalTokenNumber = grammar.AllSymbols.OfType>().First().TokenNumber; - var gotos = new List(); - - for (int i = 0; i < itemSets.Count(); ++i) - { - var itemSet = itemSets[i]; - foreach (var lr1Item in itemSet) - { - // Fill the action table first - - // If the next symbol in the LR0 item is a terminal (symbol - // found after the dot, add a SHIFT j IF GOTO(lr0Item, nextSymbol) == j - if (lr1Item.SymbolRightOfDot != null) - { - if (lr1Item.SymbolRightOfDot is Terminal) - { - // Look for a transition in the gotoSetTransitions - // there should always be one. - var transition = gotoSetTransitions.First(t => t.From == itemSet && t.OnSymbol == lr1Item.SymbolRightOfDot); - int transitionIndex = itemSets.IndexOf(transition.To); - int tokenNumber = ((Terminal)lr1Item.SymbolRightOfDot).TokenNumber; - - SetActionTable(uncompressedActionTable, i, tokenNumber, LRParseTable.Shift(transitionIndex)); - } - } - else - { - // The dot is at the end. Add reduce action to the parse table for - // all lookaheads for the resulting symbol - // Do NOT do this if the resulting symbol is the start symbol - if (lr1Item.ProductionRule.ResultSymbol != grammar.AcceptSymbol) - { - int numReductionRules = reductionRules.Count(); - int reductionRule = 0; - for (; reductionRule < numReductionRules; ++reductionRule) - { - if (reductionRules[reductionRule].Item1 == lr1Item.ProductionRule) - { - // Found it, it's already created - break; - } - } - - if (numReductionRules == reductionRule) - { - // Need to create a new reduction rule - reductionRules.Add(new Tuple, ReductionRule>(lr1Item.ProductionRule, - new ReductionRule - { - NumTokensToPop = lr1Item.ProductionRule.Symbols.Count(), - OnReduce = lr1Item.ProductionRule.ReduceAction, - TokenToPush = ((Symbol)lr1Item.ProductionRule.ResultSymbol).TokenNumber - firstNonTerminalTokenNumber - })); - } - - foreach (var lookahead in lr1Item.Lookaheads) - { - try - { - SetActionTable(uncompressedActionTable, i, lookahead.TokenNumber, LRParseTable.Reduce(reductionRule)); - } - catch (ReduceReduceConflictException e) - { - // Augment exception with correct symbols for the poor user - e.PreviousReduceSymbol = reductionRules[-(1 + e.PreviousValue)].Item1.ResultSymbol; - e.NewReduceSymbol = reductionRules[reductionRule].Item1.ResultSymbol; - throw; - } - } - } - else - { - // This production rule has the start symbol with the dot at the rightmost end in it, add ACCEPT to action - // for end of input character. - SetActionTable(uncompressedActionTable, i, grammar.EndOfInputTerminal.TokenNumber, LRParseTable.Accept()); - } - } - } - - // Fill the goto table with the state IDs of all states that have been originally - // produced by the GOTO operation from this state - foreach (var gotoTransition in gotoSetTransitions.Where(f => f.From == itemSet && f.OnSymbol is NonTerminal)) - { - gotos.Add(new GotoTable.GotoTableValue - { - NewState = itemSets.IndexOf(gotoTransition.To), - State = i, - Token = - ((Symbol) gotoTransition.OnSymbol).TokenNumber - - firstNonTerminalTokenNumber - }); - } - } - - // Move the reduction rules to the table. No need for the impromptu dictionary - // anymore. - table.ReductionRules = reductionRules.Select(f => f.Item2).ToArray(); - table.Action = new CompressedTable(uncompressedActionTable); - table.Goto = new GotoTable(gotos); - table.StateCount = itemSets.Count; - - // Useful point to look at the table, and everything the builder has generated, since after this point the grammar is pretty much destroyed. - //string gotoGraph = gotoSetTransitions.AsDotNotation(itemSets); - //string debugTable = table.ToDebugString(grammar, itemSets.Count); - return table; - } - - private void SetActionTable(short[,] table, int state, int tokenNumber, short value) - { - // This is an error condition, find out what sort of exception it is - short oldValue = table[state, tokenNumber]; - if (oldValue != value && oldValue != short.MinValue) - { - try - { - if (oldValue < 0 && value < 0) - { - // Both values are reduce. Throw a reduce reduce conflict. This is not solveable - throw new ReduceReduceConflictException("Grammar contains a reduce reduce conflict"); - } - - int shiftTokenNumber = tokenNumber; - int reduceRuleNumber; - short shiftValue; - short reduceValue; - - if (oldValue < 0) - { - // The old value was a reduce, the new must be a shift - shiftValue = value; - reduceValue = oldValue; - - reduceRuleNumber = -(oldValue + 1); - } - else - { - // TODO: Unsure if this is a real case. The only testcases - // TODO: that end up here are retarded tests which are cyclic in nature. - // TODO: These cases always fail later on anyway due to conflicts. - // The old value was a shift - // the new value must be a reduce - shiftValue = oldValue; - reduceValue = value; - - reduceRuleNumber = -(value + 1); - } - - // Check if these tokens have declared precedences and associativity - // If they do, we might be able to act on this. - Terminal shiftingTerminal = - grammar.AllSymbols.OfType>().First( - f => f.TokenNumber == shiftTokenNumber); - var shiftPrecedence = grammar.GetPrecedence(shiftingTerminal); - - var productionRule = reductionRules[reduceRuleNumber].Item1; - - // If the rule has a context dependent precedence, use that. Otherwise use - // the reduce precedence of the last terminal symbol in the production rules precedence - var reducePrecedence = productionRule.ContextPrecedence?? - grammar.GetPrecedence(productionRule.Symbols.Reverse().OfType>().FirstOrDefault()); - - // If either rule has no precedence this is not a legal course of action. - // TODO: In bison this is apparently cool, it prefers to shift in this case. I don't know why, but this - // TODO: seems like a dangerous course of action to me. - if (shiftPrecedence == null || reducePrecedence == null) - { - throw new ShiftReduceConflictException("Grammar contains a shift reduce conflict") - { - ShiftSymbol = shiftingTerminal, - ReduceSymbol = productionRule.ResultSymbol, - }; - } - - if (shiftPrecedence.Precedence < reducePrecedence.Precedence) - { - // Precedence of reduce is higher, choose to reduce - table[state, tokenNumber] = reduceValue; - } - else if (shiftPrecedence.Precedence > reducePrecedence.Precedence) - { - // Shift precedence is higher. Shift - table[state, tokenNumber] = shiftValue; - } - // Both tokens are in the same precedence group! It's now up to the associativity - // The two tokens CANNOT have different associativity, due to how the configuration works - // which throws up if you try to multiple-define the precedence - else if (shiftPrecedence.Associativity == AssociativityDirection.Left) - { - // Prefer reducing - table[state, tokenNumber] = reduceValue; - } - else if (shiftPrecedence.Associativity == AssociativityDirection.Right) - { - // Prefer shifting - table[state, tokenNumber] = shiftValue; - } - else // if (shiftPrecedence.Associativity == AssociativityDirection.NonAssociative) <- this is implied - { - // Unresolveable - throw new ShiftReduceConflictException("Grammar contains a shift reduce conflict (Nonassociative)") - { - ShiftSymbol = shiftingTerminal, - ReduceSymbol = productionRule.ResultSymbol, - }; - } - } - catch (AmbiguousGrammarException ex) - { - // Fill in more information on the error and rethrow the error - ex.StateNumber = state; - ex.TokenNumber = tokenNumber; - ex.PreviousValue = oldValue; - ex.NewValue = value; - throw; - } - } - else - { - table[state, tokenNumber] = value; - } - } - - private TerminalSet CalculateFirst(ISet> nullable) - { - var first = new TerminalSet(grammar); - - // Algorithm is that if a nonterminal has a production that starts with a - // terminal, we add that to the first set. If it starts with a nonterminal, we add - // that nonterminals firsts to the known firsts of our nonterminal. - bool addedThings; - do - { - addedThings = false; - - foreach (var symbol in grammar.AllSymbols.OfType>()) - { - foreach (var productionRule in symbol.ProductionRules) - { - foreach (var productionSymbol in productionRule.Symbols) - { - // Terminals are trivial, just add them - if (productionSymbol is Terminal) - { - addedThings |= first.Add(symbol, (Terminal)productionSymbol); - - // This production rule is done now - break; - } - - if (productionSymbol is NonTerminal) - { - var nonTerminal = (NonTerminal)productionSymbol; - // Add everything in FIRST for the given terminal. - foreach (var f in first[nonTerminal]) - { - addedThings |= first.Add(symbol, f); - } - - // Stop iterating if it wasn't nullable - if (!nullable.Contains(nonTerminal)) - { - // Jump out since we've found a non nullable symbol - break; - } - } - } - } - } - } while (addedThings); - - return first; - } - - private Lr1ItemSet Goto(IEnumerable> closures, ISymbol symbol) - { - // Every place there is a symbol to the right of the dot that matches the symbol we are looking for - // add a new Lr1 item that has the dot moved one step to the right. - return new Lr1ItemSet(from lr1Item in closures - where lr1Item.SymbolRightOfDot != null && lr1Item.SymbolRightOfDot == symbol - select new Lr1Item(lr1Item.ProductionRule, lr1Item.DotLocation + 1, lr1Item.Lookaheads)); - } - - private Lr1ItemSet Closure(IEnumerable> items, TerminalSet first, ISet> nullable) - { - // The items themselves are always in their own closure set - var closure = new Lr1ItemSet(); - foreach (var lr1Item in items) - { - closure.Add(lr1Item); - } - - // This needs to be a normal for loop since we add to the underlying collection - // as we go along. This avoids investigating the same rule twice - for (int currentItem = 0; currentItem < closure.Count(); ++currentItem) - { - var item = closure[currentItem]; - - ISymbol symbolRightOfDot = item.SymbolRightOfDot; - if (symbolRightOfDot != null) - { - // Generate the lookahead items - var lookaheads = new HashSet>(); - - bool nonNullableFound = false; - for (int i = item.DotLocation + 1; i < item.ProductionRule.Symbols.Length; ++i) - { - var symbol = item.ProductionRule.Symbols[i]; - - // If symbol is terminal, just add it - if (symbol is Terminal) - { - lookaheads.Add((Terminal)symbol); - - // Terminals are not nullable, break out of loop - nonNullableFound = true; - break; - } - - foreach (var terminal in first[(NonTerminal)symbol]) - { - lookaheads.Add(terminal); - } - - if (!nullable.Contains(symbol)) - { - nonNullableFound = true; - break; - } - } - - if (!nonNullableFound) - { - // Add each of the lookahead symbols of the generating rule - // to the new lookahead set - foreach (var lookahead in item.Lookaheads) - { - lookaheads.Add(lookahead); - } - } - - // Create new Lr1 items from all rules where the resulting symbol of the production rule - // matches the symbol that was to the right of the dot. - var newLr1Items = - grammar.ProductionRules.Where(f => f.ResultSymbol == symbolRightOfDot).Select( - f => new Lr1Item(f, 0, lookaheads)); - - foreach (var lr1Item in newLr1Items) - { - closure.Add(lr1Item); - } - } - } - - return closure; - } - } -} - +using System.Collections.Generic; +using System.Linq; +using System; + +using Piglet.Parser.Configuration; +using Piglet.Common; + +namespace Piglet.Parser.Construction +{ + internal sealed class ParserBuilder + { + // Holds the generated reduction rules, which we'll feed the table at the end of this method + // the second part at least, the other is for indexing them while making the table. + private readonly List<(IProductionRule, ReductionRule)> _reductionRules; + private readonly IGrammar _grammar; + + + public ParserBuilder(IGrammar grammar) + { + _grammar = grammar; + _reductionRules = new List<(IProductionRule, ReductionRule)>(); + } + + internal IParser CreateParser() + { + // First order of business is to create the canonical list of LR1 states, or at least we are going to go through them as we merge the sets together. + // This starts with augmenting the grammar with an accept symbol, then we derive the grammar from that + IProductionRule start = _grammar.Start; + + // Get the first and follow sets for all nonterminal symbols + ISet> nullable = CalculateNullable(); + TerminalSet first = CalculateFirst(nullable); + + // So, we are going to calculate the LR1 closure for the start symbol, which should + // be the augmented accept state of the grammar. + // The closure is all states which are accessible by the dot at the left hand side of the + // item. + List> itemSets = new List> + { + Closure(new List> + { + new Lr1Item(start, 0, new HashSet> {_grammar.EndOfInputTerminal}) + }, first, nullable) + }; + List gotoSetTransitions = new List(); + + // Repeat until nothing gets added any more + // This is neccessary since we are merging sets as we go, which changes things around. + bool added; + + do + { + added = false; + + for (int i = 0; i < itemSets.Count(); ++i) + { + Lr1ItemSet itemSet = itemSets[i]; + + foreach (ISymbol symbol in _grammar.AllSymbols) + { + // Calculate the itemset for by goto for each symbol in the grammar + Lr1ItemSet gotoSet = Goto(itemSet, symbol); + + // If there is anything found in the set + if (gotoSet.Any()) + { + // Do a closure on the goto set and see if it's already present in the sets of items that we have + // if that is not the case add it to the item set + gotoSet = Closure(gotoSet, first, nullable); + + Lr1ItemSet oldGotoSet = itemSets.FirstOrDefault(f => f.CoreEquals(gotoSet)); + + if (oldGotoSet is null) + { + itemSets.Add(gotoSet); // Add goto set to itemsets + gotoSetTransitions.Add(new GotoSetTransition(itemSet, gotoSet, symbol)); // Add a transition + + added = true; + } + else + { + // Already found the set + // Merge the lookaheads for all rules + oldGotoSet.MergeLookaheads(gotoSet); + + // Add a transition if it already isn't there + GotoSetTransition nt = new GotoSetTransition(itemSet, oldGotoSet, symbol); + + if (!gotoSetTransitions.Any(a => a.From == nt.From && a.OnSymbol == nt.OnSymbol && a.To == nt.To)) + gotoSetTransitions.Add(nt); + } + } + } + } + } + while (added); + + LRParseTable parseTable = CreateParseTable(itemSets, gotoSetTransitions); + + // Create a new parser using that parse table and some additional information that needs + // to be available for the runtime parsing to work. + return new LRParser( + parseTable, + ((Terminal)_grammar.ErrorToken).TokenNumber, + _grammar.EndOfInputTerminal.TokenNumber, + _grammar.AllSymbols.OfType>().Select(f => f.DebugName).ToArray() + ); + } + + private ISet> CalculateNullable() + { + // TODO: This is a naive implementation that keeps iterating until the set becomes stable + // TODO: This could probably be optimized. + + // A nullable symbol is a symbol that may consist of only epsilon transitions + HashSet> nullable = new HashSet>(); + bool nullableSetChanged; + + do + { + nullableSetChanged = false; + + foreach (NonTerminal nonTerminal in _grammar.AllSymbols.OfType>()) + { + // No need to reevaluate things we know to be nullable. + if (nullable.Contains(nonTerminal)) + continue; + + foreach (IProductionRule production in nonTerminal.ProductionRules) + { + // If this production is nullable, add the nonterminal to the set. + + // Iterate over symbols. If we find a terminal it is never nullable + // if we find a nonterminal continue iterating only if this terminal itself is not nullable. + // By this rule, empty production rules will always return nullable true + bool symbolIsNullable = production.Symbols.All(symbol => symbol is NonTerminal term && nullable.Contains(term)); + + if (symbolIsNullable) + nullableSetChanged |= nullable.Add(nonTerminal); + } + } + } + while (nullableSetChanged); + + return nullable; + } + + private LRParseTable CreateParseTable(List> itemSets, List gotoSetTransitions) + { + LRParseTable table = new LRParseTable(); + + // Create a temporary uncompressed action table. This is what we will use to create + // the compressed action table later on. This could probably be improved upon to save memory if needed. + short[,] uncompressedActionTable = new short[itemSets.Count, _grammar.AllSymbols.OfType>().Count()]; + + for (int i = 0; i < itemSets.Count(); ++i) + for (int j = 0; j < _grammar.AllSymbols.OfType>().Count(); ++j) + uncompressedActionTable[i, j] = short.MinValue; + + int firstNonTerminalTokenNumber = _grammar.AllSymbols.OfType>().First().TokenNumber; + List gotos = new List(); + + for (int i = 0; i < itemSets.Count(); ++i) + { + Lr1ItemSet itemSet = itemSets[i]; + foreach (Lr1Item lr1Item in itemSet) + { + // Fill the action table first + + // If the next symbol in the LR0 item is a terminal (symbol + // found after the dot, add a SHIFT j IF GOTO(lr0Item, nextSymbol) == j + if (lr1Item.SymbolRightOfDot != null) + { + if (lr1Item.SymbolRightOfDot is Terminal) + { + // Look for a transition in the gotoSetTransitions there should always be one. + GotoSetTransition transition = gotoSetTransitions.First(t => t.From == itemSet && t.OnSymbol == lr1Item.SymbolRightOfDot); + int transitionIndex = itemSets.IndexOf(transition.To); + int tokenNumber = ((Terminal)lr1Item.SymbolRightOfDot).TokenNumber; + + SetActionTable(uncompressedActionTable, i, tokenNumber, LRParseTable.Shift(transitionIndex)); + } + } + else + { + // The dot is at the end. Add reduce action to the parse table for all lookaheads for the resulting symbol + // Do NOT do this if the resulting symbol is the start symbol + if (lr1Item.ProductionRule.ResultSymbol != _grammar.AcceptSymbol) + { + int numReductionRules = _reductionRules.Count(); + int reductionRule = 0; + + for (; reductionRule < numReductionRules; ++reductionRule) + if (_reductionRules[reductionRule].Item1 == lr1Item.ProductionRule) + break; // Found it, it's already created + + if (numReductionRules == reductionRule) + // Need to create a new reduction rule + _reductionRules.Add((lr1Item.ProductionRule, new ReductionRule( + (INonTerminal)lr1Item.ProductionRule.ResultSymbol, + lr1Item.ProductionRule.Symbols.Count(), + ((Symbol)lr1Item.ProductionRule.ResultSymbol).TokenNumber - firstNonTerminalTokenNumber, + lr1Item.ProductionRule.ReduceAction! + ))); + + foreach (Terminal lookahead in lr1Item.Lookaheads) + try + { + SetActionTable(uncompressedActionTable, i, lookahead.TokenNumber, LRParseTable.Reduce(reductionRule)); + } + catch (ReduceReduceConflictException e) + { + // Augment exception with correct symbols for the poor user + e.PreviousReduceSymbol = _reductionRules[-(1 + e.PreviousValue)].Item1.ResultSymbol; + e.NewReduceSymbol = _reductionRules[reductionRule].Item1.ResultSymbol; + e._message = $"Grammar contains a reduce-reduce conflict: previous reduce symbol '{e.PreviousReduceSymbol}', new reduce symbol: '{e.NewReduceSymbol}'.\nDid you forget to set an associativity/precedence?"; + + throw; + } + } + else + // This production rule has the start symbol with the dot at the rightmost end in it, add ACCEPT to action for end of input character. + SetActionTable(uncompressedActionTable, i, _grammar.EndOfInputTerminal.TokenNumber, LRParseTable.Accept()); + } + } + + // Fill the goto table with the state IDs of all states that have been originally produced by the GOTO operation from this state + foreach (GotoSetTransition gotoTransition in gotoSetTransitions.Where(f => f.From == itemSet && f.OnSymbol is NonTerminal)) + gotos.Add(new GotoTable.GotoTableValue + { + NewState = itemSets.IndexOf(gotoTransition.To), + State = i, + Token = ((Symbol)gotoTransition.OnSymbol).TokenNumber - firstNonTerminalTokenNumber + }); + } + + // Move the reduction rules to the table. No need for the impromptu dictionary + // anymore. + table.ReductionRules = _reductionRules.Select(f => f.Item2).ToArray(); + table.Action = new CompressedTable(uncompressedActionTable); + table.Goto = new GotoTable(gotos); + table.StateCount = itemSets.Count; + + // Useful point to look at the table, and everything the builder has generated, since after this point the grammar is pretty much destroyed. + //string gotoGraph = gotoSetTransitions.AsDotNotation(itemSets); + //string debugTable = table.ToDebugString(grammar, itemSets.Count); + return table; + } + + private void SetActionTable(short[,] table, int state, int tokenNumber, short value) + { + // This is an error condition, find out what sort of exception it is + short oldValue = table[state, tokenNumber]; + + if (oldValue != value && oldValue != short.MinValue) + { + try + { + if (oldValue < 0 && value < 0) + // Both values are reduce. Throw a reduce reduce conflict. This is not solveable + throw new ReduceReduceConflictException("Grammar contains a reduce-reduce conflict.\nDid you forget to set an associativity/precedence?"); + + int shiftTokenNumber = tokenNumber; + int reduceRuleNumber; + short shiftValue; + short reduceValue; + + if (oldValue < 0) + { + // The old value was a reduce, the new must be a shift + shiftValue = value; + reduceValue = oldValue; + reduceRuleNumber = -(oldValue + 1); + } + else + { + // TODO: Unsure if this is a real case. The only testcases + // TODO: that end up here are retarded tests which are cyclic in nature. + // TODO: These cases always fail later on anyway due to conflicts. + // The old value was a shift + // the new value must be a reduce + shiftValue = oldValue; + reduceValue = value; + reduceRuleNumber = -(value + 1); + } + + // Check if these tokens have declared precedences and associativity + // If they do, we might be able to act on this. + Terminal shiftingTerminal = _grammar.AllSymbols.OfType>().First(f => f.TokenNumber == shiftTokenNumber); + IPrecedenceGroup shiftPrecedence = _grammar.GetPrecedence(shiftingTerminal); + IProductionRule productionRule = _reductionRules[reduceRuleNumber].Item1; + + // If the rule has a context dependent precedence, use that. Otherwise use + // the reduce precedence of the last terminal symbol in the production rules precedence + IPrecedenceGroup reducePrecedence = productionRule.ContextPrecedence ?? + _grammar.GetPrecedence(productionRule.Symbols.Reverse().OfType>().FirstOrDefault()); + + // If either rule has no precedence this is not a legal course of action. + // TODO: In bison this is apparently cool, it prefers to shift in this case. I don't know why, but this seems like a dangerous course of action to me. + if (shiftPrecedence is null || reducePrecedence is null) + throw new ShiftReduceConflictException(shiftingTerminal, productionRule.ResultSymbol); + + if (shiftPrecedence.Precedence < reducePrecedence.Precedence) + table[state, tokenNumber] = reduceValue; // Precedence of reduce is higher, choose to reduce + else if (shiftPrecedence.Precedence > reducePrecedence.Precedence) + table[state, tokenNumber] = shiftValue; // Shift precedence is higher. Shift + + // Both tokens are in the same precedence group! It's now up to the associativity + // The two tokens CANNOT have different associativity, due to how the configuration works + // which throws up if you try to multiple-define the precedence + else if (shiftPrecedence.Associativity == AssociativityDirection.Left) + table[state, tokenNumber] = reduceValue; // Prefer reducing + else if (shiftPrecedence.Associativity == AssociativityDirection.Right) + table[state, tokenNumber] = shiftValue; // Prefer shifting + else // if (shiftPrecedence.Associativity == AssociativityDirection.NonAssociative) <- this is implied + throw new ShiftReduceConflictException(shiftingTerminal, productionRule.ResultSymbol); + } + catch (AmbiguousGrammarException ex) + { + // Fill in more information on the error and rethrow the error + ex.StateNumber = state; + ex.TokenNumber = tokenNumber; + ex.PreviousValue = oldValue; + ex.NewValue = value; + + throw; + } + } + else + table[state, tokenNumber] = value; + } + + private TerminalSet CalculateFirst(ISet> nullable) + { + TerminalSet first = new TerminalSet(_grammar); + + // Algorithm is that if a nonterminal has a production that starts with a + // terminal, we add that to the first set. If it starts with a nonterminal, we add + // that nonterminals firsts to the known firsts of our nonterminal. + bool addedThings; + + do + { + addedThings = false; + + foreach (NonTerminal symbol in _grammar.AllSymbols.OfType>()) + foreach (IProductionRule productionRule in symbol.ProductionRules) + foreach (ISymbol? productionSymbol in productionRule.Symbols) + if (productionSymbol is Terminal terminal) + { + // Terminals are trivial, just add them + addedThings |= first.Add(symbol, terminal); + + // This production rule is done now + break; + } + else if (productionSymbol is NonTerminal nonTerminal) + { + // Add everything in FIRST for the given terminal. + foreach (Terminal f in first[nonTerminal]) + addedThings |= first.Add(symbol, f); + + // Stop iterating if it wasn't nullable + if (!nullable.Contains(nonTerminal)) + break; // Jump out since we've found a non nullable symbol + } + } + while (addedThings); + + return first; + } + + private Lr1ItemSet Goto(IEnumerable> closures, ISymbol symbol) => + // Every place there is a symbol to the right of the dot that matches the symbol we are looking for + // add a new Lr1 item that has the dot moved one step to the right. + new Lr1ItemSet(from lr1Item in closures + where lr1Item.SymbolRightOfDot != null + where lr1Item.SymbolRightOfDot == symbol + select new Lr1Item(lr1Item.ProductionRule, lr1Item.DotLocation + 1, lr1Item.Lookaheads)); + + private Lr1ItemSet Closure(IEnumerable> items, TerminalSet first, ISet> nullable) + { + // The items themselves are always in their own closure set + Lr1ItemSet closure = new Lr1ItemSet(); + + foreach (Lr1Item lr1Item in items) + closure.Add(lr1Item); + + // This needs to be a normal for loop since we add to the underlying collection + // as we go along. This avoids investigating the same rule twice + for (int currentItem = 0; currentItem < closure.Count(); ++currentItem) + { + Lr1Item item = closure[currentItem]; + ISymbol symbolRightOfDot = item.SymbolRightOfDot; + + if (symbolRightOfDot != null) + { + // Generate the lookahead items + HashSet> lookaheads = new HashSet>(); + bool nonNullableFound = false; + + for (int i = item.DotLocation + 1; i < item.ProductionRule.Symbols.Length; ++i) + { + ISymbol? symbol = item.ProductionRule.Symbols[i]; + + // If symbol is terminal, just add it + if (symbol is Terminal term) + { + lookaheads.Add(term); + + // Terminals are not nullable, break out of loop + nonNullableFound = true; + + break; + } + + foreach (Terminal terminal in first[(NonTerminal?)symbol]) + lookaheads.Add(terminal); + + if (!nullable.Contains(symbol)) + { + nonNullableFound = true; + + break; + } + } + + if (!nonNullableFound) + // Add each of the lookahead symbols of the generating rule to the new lookahead set + foreach (Terminal lookahead in item.Lookaheads) + lookaheads.Add(lookahead); + + // Create new Lr1 items from all rules where the resulting symbol of the production rule matches the symbol that was to the right of the dot. + IEnumerable> newLr1Items = _grammar.ProductionRules.Where(f => f.ResultSymbol == symbolRightOfDot).Select(f => new Lr1Item(f, 0, lookaheads)); + + foreach (Lr1Item lr1Item in newLr1Items) + closure.Add(lr1Item); + } + } + + return closure; + } + + + internal sealed class GotoSetTransition + { + public Lr1ItemSet From { get; } + public Lr1ItemSet To { get; } + public ISymbol OnSymbol { get; } + + + public GotoSetTransition(Lr1ItemSet from, Lr1ItemSet to, ISymbol onSymbol) + { + From = from; + To = to; + OnSymbol = onSymbol; + } + } + } +} diff --git a/Piglet/Parser/Construction/ReduceReduceConflictException.cs b/Piglet/Parser/Construction/ReduceReduceConflictException.cs index 7779046..de82dbe 100644 --- a/Piglet/Parser/Construction/ReduceReduceConflictException.cs +++ b/Piglet/Parser/Construction/ReduceReduceConflictException.cs @@ -7,16 +7,10 @@ namespace Piglet.Parser.Construction /// at the same points. This is usually indicative of a serious grammar error. /// /// Semantic value of symbols used in the grammar - public class ReduceReduceConflictException : AmbiguousGrammarException + public sealed class ReduceReduceConflictException + : AmbiguousGrammarException { - /// - /// Create a new reduce reduce conflict exception - /// - /// Exception message - public ReduceReduceConflictException(string message) - : base (message) - { - } + internal string? _message; /// /// The reduce symbol that existed in the parse table before the new reduce symbol was applied. @@ -27,5 +21,17 @@ public ReduceReduceConflictException(string message) /// The reduce symbol that the parser generator tried to apply. /// public ISymbol NewReduceSymbol { get; internal set; } + + public override string Message => _message ?? base.Message; + + + /// + /// Create a new reduce reduce conflict exception + /// + /// Exception message + public ReduceReduceConflictException(string message) + : base(message) + { + } } } \ No newline at end of file diff --git a/Piglet/Parser/Construction/ReductionRule.cs b/Piglet/Parser/Construction/ReductionRule.cs index 2581a92..40cb5b5 100644 --- a/Piglet/Parser/Construction/ReductionRule.cs +++ b/Piglet/Parser/Construction/ReductionRule.cs @@ -1,11 +1,25 @@ using System; +using Piglet.Parser.Configuration; +using Piglet.Lexer.Runtime; + namespace Piglet.Parser.Construction { - internal class ReductionRule : IReductionRule + internal sealed class ReductionRule + : IReductionRule { - public int NumTokensToPop { get; set; } - public int TokenToPush { get; set; } - public Func OnReduce { get; set; } + public int NumTokensToPop { get; } + public int TokenToPush { get; } + public INonTerminal ReductionSymbol { get; } + public Func[], T> OnReduce { get; } + + + public ReductionRule(INonTerminal sym, int pop, int push, Func[], T> func) + { + ReductionSymbol = sym; + NumTokensToPop = pop; + TokenToPush = push; + OnReduce = func; + } } } \ No newline at end of file diff --git a/Piglet/Parser/Construction/ShiftReduceConflictException.cs b/Piglet/Parser/Construction/ShiftReduceConflictException.cs index 21930ae..6173dea 100644 --- a/Piglet/Parser/Construction/ShiftReduceConflictException.cs +++ b/Piglet/Parser/Construction/ShiftReduceConflictException.cs @@ -1,32 +1,34 @@ -using Piglet.Parser.Configuration; - -namespace Piglet.Parser.Construction -{ - /// - /// A shift reduce conflict exception is thrown by the parser generator when the grammar is - /// ambiguous in such a way that the parser cannot decide if to shift another token or to reduce - /// by a given rule. - /// - /// - public class ShiftReduceConflictException : AmbiguousGrammarException - { - /// - /// Construct a new shift reduce exception - /// - /// Exception message - public ShiftReduceConflictException(string message) - : base(message) - { - } - - /// - /// The shift symbol in the conflict - /// - public ISymbol ShiftSymbol { get; internal set; } - - /// - /// The reduce symbol in the conflict - /// - public ISymbol ReduceSymbol { get; internal set; } - } +using Piglet.Parser.Configuration; + +namespace Piglet.Parser.Construction +{ + /// + /// A shift reduce conflict exception is thrown by the parser generator when the grammar is + /// ambiguous in such a way that the parser cannot decide if to shift another token or to reduce + /// by a given rule. + /// + public sealed class ShiftReduceConflictException + : AmbiguousGrammarException + { + /// + /// The shift symbol in the conflict + /// + public ISymbol ShiftSymbol { get; } + + /// + /// The reduce symbol in the conflict + /// + public ISymbol ReduceSymbol { get; } + + + /// + /// Construct a new shift reduce exception + /// + public ShiftReduceConflictException(ISymbol shift, ISymbol reduce) + : base($"The grammar contains a shift-reduce conflict.\nShift symbol: {shift}\nReduce symbol: {reduce}\nDid you forget to set an associativity/precedence?") + { + ShiftSymbol = shift; + ReduceSymbol = reduce; + } + } } \ No newline at end of file diff --git a/Piglet/Parser/Construction/TerminalSet.cs b/Piglet/Parser/Construction/TerminalSet.cs index 8b705bd..3bc1d33 100644 --- a/Piglet/Parser/Construction/TerminalSet.cs +++ b/Piglet/Parser/Construction/TerminalSet.cs @@ -14,7 +14,7 @@ public TerminalSet(IGrammar grammar) // Iterate through all the symbols we've got in the grammar // and add stuff to the first set - foreach (var symbol in grammar.AllSymbols.OfType>()) + foreach (NonTerminal symbol in grammar.AllSymbols.OfType>()) { // Initialize the list dict[symbol] = new List>(); @@ -23,7 +23,7 @@ public TerminalSet(IGrammar grammar) public bool Add(NonTerminal symbol, Terminal terminal) { - var terminals = dict[symbol]; + List> terminals = dict[symbol]; if (terminals.Contains(terminal)) { return false; diff --git a/Piglet/Parser/IParser.cs b/Piglet/Parser/IParser.cs index fcf8352..93c86b5 100644 --- a/Piglet/Parser/IParser.cs +++ b/Piglet/Parser/IParser.cs @@ -1,6 +1,6 @@ -using System.IO; -using Piglet.Lexer; using Piglet.Parser.Construction; +using Piglet.Lexer.Runtime; +using Piglet.Lexer; namespace Piglet.Parser { @@ -13,7 +13,7 @@ public interface IParser /// /// Gets or sets the lexer associated with the parser. /// - ILexer Lexer { get; set; } + ILexer? Lexer { get; set; } /// /// Get the internal parse table for this parser. @@ -28,12 +28,6 @@ public interface IParser /// The resulting semantic value symbol T Parse(string input); - /// - /// Parse an input string, returning the resulting semantic value type that is left on the parse - /// stack. - /// - /// Input stream to parse - /// The resulting semantic value symbol - T Parse(TextReader input); + LexedToken ParseTokens(string input); } } \ No newline at end of file diff --git a/Piglet/Parser/LRParser.cs b/Piglet/Parser/LRParser.cs index daebef7..e7ad24d 100644 --- a/Piglet/Parser/LRParser.cs +++ b/Piglet/Parser/LRParser.cs @@ -1,166 +1,153 @@ -using System.Collections.Generic; -using System.IO; -using System.Linq; -using Piglet.Lexer; -using Piglet.Parser.Construction; - -namespace Piglet.Parser -{ - internal class LRParser : IParser - { - private readonly IParseTable parseTable; - - private readonly int errorTokenNumber; - private readonly int endOfInputTokenNumber; - private readonly string[] terminalDebugNames; - - internal LRParser(IParseTable parseTable, int errorTokenNumber, int endOfInputTokenNumber, string[] terminalDebugNames) - { - this.parseTable = parseTable; - - this.errorTokenNumber = errorTokenNumber; - this.endOfInputTokenNumber = endOfInputTokenNumber; - this.terminalDebugNames = terminalDebugNames; - } - - public IParseTable ParseTable { get { return parseTable; } } - - public ILexer Lexer { get; set; } - - private T Parse(ILexerInstance lexerInstance) - { - var valueStack = new Stack(); - var parseStack = new Stack(); - - // Push default state onto the parse stack. Default state is always 0 - parseStack.Push(0); - - var input = lexerInstance.Next(); - - // This holds the last exception we found when parsing, since we - // will need to pass this to an error handler once the proper handler has been found - ParseException exception = null; - - while (true) - { - int state = parseStack.Peek(); - int action = parseTable.Action[state, input.Item1]; - if (action >= 0) - { - if (action == short.MaxValue) - { - // Accept! - return valueStack.Pop(); - } - - // Shift - parseStack.Push(input.Item1); // Push token unto stack - parseStack.Push(action); // Push state unto stack - - // Shift token value unto value stack - valueStack.Push(input.Item2); - - // Lex next token - input = lexerInstance.Next(); - } - else - { - if (action == short.MinValue) - { - // Get the expected tokens - string[] expectedTokens = GetExpectedTokenNames(state).ToArray(); - - // Create an exception that either might be thrown or may be handed to the error handling routine. - exception = new ParseException(string.Format("Illegal token {0}. Expected {1}", - terminalDebugNames[input.Item1], string.Join(",", expectedTokens))) - { - LexerState = lexerInstance, - FoundToken = terminalDebugNames[input.Item1], - ExpectedTokens = expectedTokens, - FoundTokenId = input.Item1, - ParserState = state - }; - - // Go for error recovery! - while (parseTable.Action[parseStack.Peek(), errorTokenNumber] == short.MinValue) - { - // If we run out of stack while searching for the error handler, throw the exception - // This is what happens when there is no error handler defined at all. - if (parseStack.Count <= 2) - throw exception; - - parseStack.Pop(); // Pop state - parseStack.Pop(); // Pop token - valueStack.Pop(); // Pop whatever value - } - - // Shift the error token unto the stack - state = parseStack.Peek(); - parseStack.Push(errorTokenNumber); - parseStack.Push(parseTable.Action[state, errorTokenNumber]); - valueStack.Push(default(T)); - state = parseStack.Peek(); - - // We have now found a state where error recovery is enabled. This means that we - // continue to scan the input stream looking for something which is accepted. - // End of input will cause the exception to be thrown - for (; parseTable.Action[state, input.Item1] == short.MinValue && - input.Item1 != endOfInputTokenNumber; input = lexerInstance.Next()) - ; // nom nom nom - - // Ran out of file looking for the end of the error rule - if (input.Item1 == endOfInputTokenNumber) - throw exception; - - // If we get here we are pretty cool, continue running the parser. The actual error recovery routine will be - // called as soon as the error rule itself is reduced. - } - else - { - // Get the right reduction rule to apply - var reductionRule = parseTable.ReductionRules[-(action + 1)]; - for (int i = 0; i < reductionRule.NumTokensToPop*2; ++i) - { - parseStack.Pop(); - } - - // Transfer to state found in goto table - int stateOnTopOfStack = parseStack.Peek(); - parseStack.Push(reductionRule.TokenToPush); - parseStack.Push(parseTable.Goto[stateOnTopOfStack, reductionRule.TokenToPush]); - - // Get tokens off the value stack for the OnReduce function to run on - var onReduceParams = new T[reductionRule.NumTokensToPop]; - - // Need to do it in reverse since thats how the stack is organized - for (int i = reductionRule.NumTokensToPop - 1; i >= 0; --i) - { - onReduceParams[i] = valueStack.Pop(); - } - - // This calls the reduction function with the possible exception set. The exception could be cleared here, but - // there is no real reason to do so, since all the normal rules will ignore it, and all the error rules are guaranteed - // to have the exception set prior to entering the reduction function. - var reduceFunc = reductionRule.OnReduce; - valueStack.Push(reduceFunc == null ? default(T) : reduceFunc(exception, onReduceParams)); - } - } - } - } - - private IEnumerable GetExpectedTokenNames(int state) - { - return terminalDebugNames.Where((t, i) => parseTable.Action[state, i] != short.MinValue); - } - - public T Parse(string input) - { - return Parse(Lexer.Begin(input)); - } - - public T Parse(TextReader input) - { - return Parse(Lexer.Begin(input)); - } - } -} +using System.Collections.Generic; +using System.Linq; +using System; + +using Piglet.Parser.Construction; +using Piglet.Lexer.Runtime; +using Piglet.Lexer; + +namespace Piglet.Parser +{ + internal sealed class LRParser + : IParser + { + private readonly int _errorTokenNumber; + private readonly int _endOfInputTokenNumber; + private readonly string?[] _terminalDebugNames; + + + public IParseTable ParseTable { get; } + public ILexer? Lexer { get; set; } + + + internal LRParser(IParseTable parseTable, int errorTokenNumber, int endOfInputTokenNumber, string?[] terminalDebugNames) + { + ParseTable = parseTable; + _errorTokenNumber = errorTokenNumber; + _endOfInputTokenNumber = endOfInputTokenNumber; + _terminalDebugNames = terminalDebugNames; + } + + private LexedToken Parse(ILexerInstance? lexerInstance) + { + if (lexerInstance is null) + throw new ArgumentNullException(nameof(lexerInstance)); + + Stack> valueStack = new Stack>(); + Stack parseStack = new Stack(); + + // Push default state onto the parse stack. Default state is always 0 + parseStack.Push(0); + + (int number, LexedToken token) input = lexerInstance.Next(); + + // This holds the last exception we found when parsing, since we + // will need to pass this to an error handler once the proper handler has been found + ParseException? exception = null; + + while (true) + { + int state = parseStack.Peek(); + int action = ParseTable.Action?[state, input.number] ?? short.MinValue; + + if (action >= 0) + { + if (action == short.MaxValue) + return valueStack.Pop(); // Accept! + + // Shift + parseStack.Push(input.number); // Push token unto stack + parseStack.Push(action); // Push state unto stack + + // Shift token value unto value stack + valueStack.Push(input.token); + + // Lex next token + input = lexerInstance.Next(); + } + else if (action == short.MinValue) + { + // Get the expected tokens + string[] expectedTokens = GetExpectedTokenNames(state).ToArray(); + + // Create an exception that either might be thrown or may be handed to the error handling routine. + exception = new ParseException($"Illegal token '{_terminalDebugNames[input.number]}', expected {{'{string.Join("', '", expectedTokens)}'}} at ({lexerInstance.CurrentLineNumber}:{lexerInstance.CurrentCharacterIndex}).") + { + LexerState = lexerInstance, + FoundToken = _terminalDebugNames[input.number], + ExpectedTokens = expectedTokens, + FoundTokenId = input.number, + ParserState = state + }; + + // Go for error recovery! + while ((ParseTable.Action?[parseStack.Peek(), _errorTokenNumber] ?? short.MinValue) == short.MinValue) + { + // If we run out of stack while searching for the error handler, throw the exception + // This is what happens when there is no error handler defined at all. + if (parseStack.Count <= 2) + throw exception; + + parseStack.Pop(); // Pop state + parseStack.Pop(); // Pop token + valueStack.Pop(); // Pop whatever value + } + + // Shift the error token unto the stack + state = parseStack.Peek(); + + parseStack.Push(_errorTokenNumber); + parseStack.Push(ParseTable.Action?[state, _errorTokenNumber] ?? short.MinValue); + valueStack.Push(new LexedToken(default!, lexerInstance.CurrentAbsoluteIndex, lexerInstance.CurrentLineNumber, lexerInstance.CurrentCharacterIndex, 0)); + + state = parseStack.Peek(); + + // We have now found a state where error recovery is enabled. This means that we + // continue to scan the input stream looking for something which is accepted. + // End of input will cause the exception to be thrown + for (; ParseTable.Action[state, input.number] == short.MinValue && input.number != _endOfInputTokenNumber; input = lexerInstance.Next()) + ; // nom nom nom + + // Ran out of file looking for the end of the error rule + if (input.number == _endOfInputTokenNumber) + throw exception; + + // If we get here we are pretty cool, continue running the parser. The actual error recovery routine will be + // called as soon as the error rule itself is reduced. + } + else if (ParseTable.ReductionRules?[-(action + 1)] is IReductionRule rule) // Get the right reduction rule to apply + { + for (int i = 0; i < rule.NumTokensToPop * 2; ++i) + parseStack.Pop(); + + // Transfer to state found in goto table + int stateOnTopOfStack = parseStack.Peek(); + + parseStack.Push(rule.TokenToPush); + parseStack.Push(ParseTable.Goto?[stateOnTopOfStack, rule.TokenToPush] ?? short.MinValue); + + // Get tokens off the value stack for the OnReduce function to run on + LexedToken[] tokens = new LexedToken[rule.NumTokensToPop]; + + // Need to do it in reverse since thats how the stack is organized + for (int i = rule.NumTokensToPop - 1; i >= 0; --i) + tokens[i] = valueStack.Pop(); + + // This calls the reduction function with the possible exception set. The exception could be cleared here, but + // there is no real reason to do so, since all the normal rules will ignore it, and all the error rules are guaranteed + // to have the exception set prior to entering the reduction function. + Func[], T> reduceFunc = rule.OnReduce!; + T result = reduceFunc == null ? default : reduceFunc(exception, tokens); + + valueStack.Push(new LexedNonTerminal(result!, rule.ReductionSymbol, tokens)); + } + } + } + + private IEnumerable GetExpectedTokenNames(int state) => _terminalDebugNames.Where((t, i) => t is { } && ParseTable.Action?[state, i] != short.MinValue).Cast(); + + public LexedToken ParseTokens(string input) => Parse(Lexer?.Begin(input)); + + public T Parse(string input) => ParseTokens(input).SymbolValue; + } +} diff --git a/Piglet/Parser/ParseException.cs b/Piglet/Parser/ParseException.cs index 56f6ed9..b344bf1 100644 --- a/Piglet/Parser/ParseException.cs +++ b/Piglet/Parser/ParseException.cs @@ -7,7 +7,8 @@ namespace Piglet.Parser /// ParseExceptions are thrown when the parser detects an illegal token according to the given /// grammar. /// - public class ParseException : Exception + public sealed class ParseException + : Exception { /// /// Current state of the lexer. @@ -23,7 +24,7 @@ public class ParseException : Exception /// /// The debug name of the token that was found instead. /// - public string FoundToken { get; set; } + public string? FoundToken { get; set; } /// /// The state number of the parser when it failed @@ -34,12 +35,33 @@ public class ParseException : Exception /// The token ID of the token that was found. /// public int FoundTokenId { get; set; } - + + /// + /// The current line number in the input text + /// + public int CurrentLineNumber => LexerState.CurrentLineNumber; + + /// + /// The contents so far of the current line + /// + public string CurrentLine => LexerState.CurrentLine; + + /// + /// The current character index inside the current line (zero-based). + /// + public int CurrentCharacterIndex => LexerState.CurrentCharacterIndex; + + /// + /// The current character index in the input text (zero-based). + /// + public int CurrentAbsoluteIndex => LexerState.CurrentAbsoluteIndex; + + /// /// Construct a new ParseException /// /// - public ParseException(string message) + internal ParseException(string message) : base(message) { } diff --git a/Piglet/Parser/ParserFactory.cs b/Piglet/Parser/ParserFactory.cs index c2bebd4..b001665 100644 --- a/Piglet/Parser/ParserFactory.cs +++ b/Piglet/Parser/ParserFactory.cs @@ -1,7 +1,7 @@ -using System; using Piglet.Parser.Configuration; using Piglet.Parser.Configuration.Fluent; + namespace Piglet.Parser { /// @@ -14,19 +14,12 @@ public static class ParserFactory /// /// Semantic value type of tokens /// A configurator, ready for use - public static IParserConfigurator Configure() - { - var parserConfigurator = new ParserConfigurator(); - return parserConfigurator; - } + public static IParserConfigurator Configure() => new ParserConfigurator(); /// /// Create a fluent configurator object. /// /// A fluent configurator - public static IFluentParserConfigurator Fluent() - { - return new FluentParserConfigurator(new ParserConfigurator()); - } + public static IFluentParserConfigurator Fluent() => new FluentParserConfigurator(new ParserConfigurator()); } } \ No newline at end of file diff --git a/Piglet/Piglet.csproj b/Piglet/Piglet.csproj index 7157b0e..4601ed7 100644 --- a/Piglet/Piglet.csproj +++ b/Piglet/Piglet.csproj @@ -1,36 +1,40 @@ - - - - netstandard2.0;netstandard1.6;net40;net45 - true - 1.5.0 - Per Dervall - Piglet - Per Dervall;harrison314 - Parser and lexer generator that does not require a pre-build step and configurable using fluent configuration. - - https://github.com/harrison314/Piglet - git - https://github.com/Dervall/Piglet - Piglet - Piglet.xml - - - - MIT - - - - full - true - - - - none - false - - - - - - + + + netcoreapp3.1;netcoreapp3.0;netstandard2.1 + 8.0 + true + 1.6.0 + Per Dervall; Unknown6656 + Piglet + Per Dervall; Unknown6656; harrison314 + Parser and lexer generator that does not require a pre-build step and configurable using fluent configuration. + https://github.com/unknown6656/Piglet + git + MIT + https://github.com/unknown6656/Piglet + Piglet + Piglet.xml + 4 + ..\bin + true + false + enable + false + + + full + false + true + + + pdbonly + true + false + + + + + + + + diff --git a/Piglet/Piglet.xml b/Piglet/Piglet.xml index 2bd6d7f..ba608ee 100644 --- a/Piglet/Piglet.xml +++ b/Piglet/Piglet.xml @@ -66,6 +66,11 @@ The default is true, and it should normally be kept that way + + + Specifies whether the lexer is case-insensitive (), or case-sensitive (). + + Gets and sets the lexer runtime, which is the method that the resulting lexer will be constructed with. @@ -102,16 +107,17 @@ This is a debug class for obtaining dot notation graphviz graphs for lexer components. - + Get the DFA and NFA graphs for a given regular expression Regular expression Minimize the resulting DFA + Determines whether the regular expression is case-insensitive Dot notation NFA graph Dot notation DFA graph - + Get the DFA and NFA graphs for a given regular expression and highlight active states for a given input string @@ -119,6 +125,7 @@ Regular expression Input string Minimize the resulting DFA + Determines whether the regular expression is case-insensitive Dot notation NFA graph Dot notation DFA graph @@ -151,6 +158,13 @@ Message to show + + + Construct a new LexerConstructionException + + Message to show + Inner exception + A lexer that tokenizes input into tuples of tokennumber and semantic value. Lexers are not thread safe, but they are reentrant. You @@ -199,7 +213,17 @@ - The current line number in the input text + The current line number in the input text (one-based). + + + + + The current character index in the input text (zero-based). + + + + + The current character index inside the current line (zero-based). @@ -224,14 +248,34 @@ - The contents of the current line so far of the current document + The contents of the current line so far of the current document. + + + + + The current character index inside the current line (zero-based). + + + + + The current character index in the input text (zero-based). - + + + The lexed input string. + + + Construct a new LexerException Message to display + The current line number of the document the lexer is scanning. + The contents of the current line so far of the current document. + The current character index inside the current line (zero-based). + The current character index in the input text (zero-based). + The lexed input string. @@ -255,6 +299,80 @@ Additional lexing settings A lexer compatibe with the given grammars tokenizing rules + + + Represents an abstract lexed (accepted) token. + + + + + Returns the string associated with the lexed symbol. + + + + + The token's absolute index in the input string (zero-based). + + + + + The token's starting line number (one-based). + + + + + The token's starting index inside the starting line (zero-based). + + + + + The token's length (in characters). + + + + + Determines whether the token is a terminal token. + + + + + The debug name of the current token. + + + + + Represents a lexed (accepted) token. + + The semantic value stored inside the lexed symbol. + + + + Returns the lexed symbol. + + + + + + + + Represents a lexed non-terminal token. + + The semantic value stored inside the lexed symbol. + + + + The + + + + + + + + + + + A configuration object for creating fluently configured parsers. @@ -456,6 +574,1471 @@ Specify that the preceeding element may be missing + + + Represents an abstract generic parser constructor. + + The parser based on this constructor will return a parsed value of the type . + + The generic value return type of the parser. + + + + The parser configuration. + + + + + Creates a new generic parser constructor with the default parser configuration. + + + + + Creates a new generic parser constructor with the given parser configuration. + + Parser configuration. + + + + Creates a new non-terminal symbol with the given generic semantic value and name. + + Generic semantic value stored inside the new non-terminal symbol. + The name of the new non-terminal symbol. + The newly created non-terminal symbol. + + + + Creates a new non-terminal symbol with the given generic semantic value and the default name for non-terminals ("NT..."). + + Generic semantic value stored inside the new non-terminal symbol. + The newly created non-terminal symbol. + + + + Creates a new terminal symbol associated with the given regex string and generic value. + + Generic semantic value stored inside the new terminal symbol. + The regex string associated with the terminal symbol. + The value stored inside the new terminal value. + The newly created terminal symbol. + + + + Creates a new terminal symbol associated with the given regex string and the function providing the generic value. + + Generic semantic value stored inside the new terminal symbol. + The regex string associated with the terminal symbol. + The function providing the generic value represented by the terminal symbol. + The newly created terminal symbol. + + + + Creates a new terminal symbol associated with the given regex string and the identity function (of the type ). + + The regex string associated with the terminal symbol. + The newly created terminal symbol. + + + + Sets the precedence for all given symbols in ascending order. The first symbol group is therefore considered to have the lowest precedence and the last symbol group the highest precedence. + + Ordered collection of groups containing a set of symbols with their corresponding associativity. + + + + Sets the given associativity to all symbols in the given symbol collection. All sybols will be considered to have the same precedence group. + + Associativity direction. + Target symbols. + The precedence group associated with the given symbols. + + + + Creates a new production rule on the given non-terminal symbol using the given production function. +
+ This production represents the reducing of the given non-terminal to the given symbol. +
+ The generic type of . + The generic type of . + The non-terminal symbol which gets reduced. + The symbol, to which gets reduced. +
+ + + Creates a new production rule on the given non-terminal symbol using the given production function. +
+ This production represents the reducing of the given non-terminal to the 0 given symbols. +
+ The non-terminal symbol which gets reduced. + The generic type of . + The generic production function which gets called upon reduction. The function accepts the value stored inside the produced symbols, and returns the value to be stored inside the non-terminal symbol "". + The newly created production rule. +
+ + + Creates a new production rule on the given non-terminal symbol using the given production function. +
+ This production represents the reducing of the given non-terminal to the 1 given symbols. +
+ The non-terminal symbol which gets reduced. + The generic type of . + The generic type of . + The symbol no. 0, to which the current non-terminal symbol gets reduced. + The generic production function which gets called upon reduction. The function accepts the value stored inside the produced symbols, and returns the value to be stored inside the non-terminal symbol "". + The newly created production rule. +
+ + + Creates a new production rule on the given non-terminal symbol using the given production function. +
+ This production represents the reducing of the given non-terminal to the 2 given symbols. +
+ The non-terminal symbol which gets reduced. + The generic type of . + The generic type of . + The generic type of . + The symbol no. 0, to which the current non-terminal symbol gets reduced. + The symbol no. 1, to which the current non-terminal symbol gets reduced. + The generic production function which gets called upon reduction. The function accepts the value stored inside the produced symbols, and returns the value to be stored inside the non-terminal symbol "". + The newly created production rule. +
+ + + Creates a new production rule on the given non-terminal symbol using the given production function. +
+ This production represents the reducing of the given non-terminal to the 3 given symbols. +
+ The non-terminal symbol which gets reduced. + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The symbol no. 0, to which the current non-terminal symbol gets reduced. + The symbol no. 1, to which the current non-terminal symbol gets reduced. + The symbol no. 2, to which the current non-terminal symbol gets reduced. + The generic production function which gets called upon reduction. The function accepts the value stored inside the produced symbols, and returns the value to be stored inside the non-terminal symbol "". + The newly created production rule. +
+ + + Creates a new production rule on the given non-terminal symbol using the given production function. +
+ This production represents the reducing of the given non-terminal to the 4 given symbols. +
+ The non-terminal symbol which gets reduced. + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The symbol no. 0, to which the current non-terminal symbol gets reduced. + The symbol no. 1, to which the current non-terminal symbol gets reduced. + The symbol no. 2, to which the current non-terminal symbol gets reduced. + The symbol no. 3, to which the current non-terminal symbol gets reduced. + The generic production function which gets called upon reduction. The function accepts the value stored inside the produced symbols, and returns the value to be stored inside the non-terminal symbol "". + The newly created production rule. +
+ + + Creates a new production rule on the given non-terminal symbol using the given production function. +
+ This production represents the reducing of the given non-terminal to the 5 given symbols. +
+ The non-terminal symbol which gets reduced. + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The symbol no. 0, to which the current non-terminal symbol gets reduced. + The symbol no. 1, to which the current non-terminal symbol gets reduced. + The symbol no. 2, to which the current non-terminal symbol gets reduced. + The symbol no. 3, to which the current non-terminal symbol gets reduced. + The symbol no. 4, to which the current non-terminal symbol gets reduced. + The generic production function which gets called upon reduction. The function accepts the value stored inside the produced symbols, and returns the value to be stored inside the non-terminal symbol "". + The newly created production rule. +
+ + + Creates a new production rule on the given non-terminal symbol using the given production function. +
+ This production represents the reducing of the given non-terminal to the 6 given symbols. +
+ The non-terminal symbol which gets reduced. + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The symbol no. 0, to which the current non-terminal symbol gets reduced. + The symbol no. 1, to which the current non-terminal symbol gets reduced. + The symbol no. 2, to which the current non-terminal symbol gets reduced. + The symbol no. 3, to which the current non-terminal symbol gets reduced. + The symbol no. 4, to which the current non-terminal symbol gets reduced. + The symbol no. 5, to which the current non-terminal symbol gets reduced. + The generic production function which gets called upon reduction. The function accepts the value stored inside the produced symbols, and returns the value to be stored inside the non-terminal symbol "". + The newly created production rule. +
+ + + Creates a new production rule on the given non-terminal symbol using the given production function. +
+ This production represents the reducing of the given non-terminal to the 7 given symbols. +
+ The non-terminal symbol which gets reduced. + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The symbol no. 0, to which the current non-terminal symbol gets reduced. + The symbol no. 1, to which the current non-terminal symbol gets reduced. + The symbol no. 2, to which the current non-terminal symbol gets reduced. + The symbol no. 3, to which the current non-terminal symbol gets reduced. + The symbol no. 4, to which the current non-terminal symbol gets reduced. + The symbol no. 5, to which the current non-terminal symbol gets reduced. + The symbol no. 6, to which the current non-terminal symbol gets reduced. + The generic production function which gets called upon reduction. The function accepts the value stored inside the produced symbols, and returns the value to be stored inside the non-terminal symbol "". + The newly created production rule. +
+ + + Creates a new production rule on the given non-terminal symbol using the given production function. +
+ This production represents the reducing of the given non-terminal to the 8 given symbols. +
+ The non-terminal symbol which gets reduced. + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The symbol no. 0, to which the current non-terminal symbol gets reduced. + The symbol no. 1, to which the current non-terminal symbol gets reduced. + The symbol no. 2, to which the current non-terminal symbol gets reduced. + The symbol no. 3, to which the current non-terminal symbol gets reduced. + The symbol no. 4, to which the current non-terminal symbol gets reduced. + The symbol no. 5, to which the current non-terminal symbol gets reduced. + The symbol no. 6, to which the current non-terminal symbol gets reduced. + The symbol no. 7, to which the current non-terminal symbol gets reduced. + The generic production function which gets called upon reduction. The function accepts the value stored inside the produced symbols, and returns the value to be stored inside the non-terminal symbol "". + The newly created production rule. +
+ + + Creates a new production rule on the given non-terminal symbol using the given production function. +
+ This production represents the reducing of the given non-terminal to the 9 given symbols. +
+ The non-terminal symbol which gets reduced. + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The symbol no. 0, to which the current non-terminal symbol gets reduced. + The symbol no. 1, to which the current non-terminal symbol gets reduced. + The symbol no. 2, to which the current non-terminal symbol gets reduced. + The symbol no. 3, to which the current non-terminal symbol gets reduced. + The symbol no. 4, to which the current non-terminal symbol gets reduced. + The symbol no. 5, to which the current non-terminal symbol gets reduced. + The symbol no. 6, to which the current non-terminal symbol gets reduced. + The symbol no. 7, to which the current non-terminal symbol gets reduced. + The symbol no. 8, to which the current non-terminal symbol gets reduced. + The generic production function which gets called upon reduction. The function accepts the value stored inside the produced symbols, and returns the value to be stored inside the non-terminal symbol "". + The newly created production rule. +
+ + + Creates a new production rule on the given non-terminal symbol using the given production function. +
+ This production represents the reducing of the given non-terminal to the 10 given symbols. +
+ The non-terminal symbol which gets reduced. + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The symbol no. 0, to which the current non-terminal symbol gets reduced. + The symbol no. 1, to which the current non-terminal symbol gets reduced. + The symbol no. 2, to which the current non-terminal symbol gets reduced. + The symbol no. 3, to which the current non-terminal symbol gets reduced. + The symbol no. 4, to which the current non-terminal symbol gets reduced. + The symbol no. 5, to which the current non-terminal symbol gets reduced. + The symbol no. 6, to which the current non-terminal symbol gets reduced. + The symbol no. 7, to which the current non-terminal symbol gets reduced. + The symbol no. 8, to which the current non-terminal symbol gets reduced. + The symbol no. 9, to which the current non-terminal symbol gets reduced. + The generic production function which gets called upon reduction. The function accepts the value stored inside the produced symbols, and returns the value to be stored inside the non-terminal symbol "". + The newly created production rule. +
+ + + Creates a new production rule on the given non-terminal symbol using the given production function. +
+ This production represents the reducing of the given non-terminal to the 11 given symbols. +
+ The non-terminal symbol which gets reduced. + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The symbol no. 0, to which the current non-terminal symbol gets reduced. + The symbol no. 1, to which the current non-terminal symbol gets reduced. + The symbol no. 2, to which the current non-terminal symbol gets reduced. + The symbol no. 3, to which the current non-terminal symbol gets reduced. + The symbol no. 4, to which the current non-terminal symbol gets reduced. + The symbol no. 5, to which the current non-terminal symbol gets reduced. + The symbol no. 6, to which the current non-terminal symbol gets reduced. + The symbol no. 7, to which the current non-terminal symbol gets reduced. + The symbol no. 8, to which the current non-terminal symbol gets reduced. + The symbol no. 9, to which the current non-terminal symbol gets reduced. + The symbol no. 10, to which the current non-terminal symbol gets reduced. + The generic production function which gets called upon reduction. The function accepts the value stored inside the produced symbols, and returns the value to be stored inside the non-terminal symbol "". + The newly created production rule. +
+ + + Creates a new production rule on the given non-terminal symbol using the given production function. +
+ This production represents the reducing of the given non-terminal to the 12 given symbols. +
+ The non-terminal symbol which gets reduced. + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The symbol no. 0, to which the current non-terminal symbol gets reduced. + The symbol no. 1, to which the current non-terminal symbol gets reduced. + The symbol no. 2, to which the current non-terminal symbol gets reduced. + The symbol no. 3, to which the current non-terminal symbol gets reduced. + The symbol no. 4, to which the current non-terminal symbol gets reduced. + The symbol no. 5, to which the current non-terminal symbol gets reduced. + The symbol no. 6, to which the current non-terminal symbol gets reduced. + The symbol no. 7, to which the current non-terminal symbol gets reduced. + The symbol no. 8, to which the current non-terminal symbol gets reduced. + The symbol no. 9, to which the current non-terminal symbol gets reduced. + The symbol no. 10, to which the current non-terminal symbol gets reduced. + The symbol no. 11, to which the current non-terminal symbol gets reduced. + The generic production function which gets called upon reduction. The function accepts the value stored inside the produced symbols, and returns the value to be stored inside the non-terminal symbol "". + The newly created production rule. +
+ + + Creates a new production rule on the given non-terminal symbol using the given production function. +
+ This production represents the reducing of the given non-terminal to the 13 given symbols. +
+ The non-terminal symbol which gets reduced. + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The symbol no. 0, to which the current non-terminal symbol gets reduced. + The symbol no. 1, to which the current non-terminal symbol gets reduced. + The symbol no. 2, to which the current non-terminal symbol gets reduced. + The symbol no. 3, to which the current non-terminal symbol gets reduced. + The symbol no. 4, to which the current non-terminal symbol gets reduced. + The symbol no. 5, to which the current non-terminal symbol gets reduced. + The symbol no. 6, to which the current non-terminal symbol gets reduced. + The symbol no. 7, to which the current non-terminal symbol gets reduced. + The symbol no. 8, to which the current non-terminal symbol gets reduced. + The symbol no. 9, to which the current non-terminal symbol gets reduced. + The symbol no. 10, to which the current non-terminal symbol gets reduced. + The symbol no. 11, to which the current non-terminal symbol gets reduced. + The symbol no. 12, to which the current non-terminal symbol gets reduced. + The generic production function which gets called upon reduction. The function accepts the value stored inside the produced symbols, and returns the value to be stored inside the non-terminal symbol "". + The newly created production rule. +
+ + + Creates a new production rule on the given non-terminal symbol using the given production function. +
+ This production represents the reducing of the given non-terminal to the 14 given symbols. +
+ The non-terminal symbol which gets reduced. + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The symbol no. 0, to which the current non-terminal symbol gets reduced. + The symbol no. 1, to which the current non-terminal symbol gets reduced. + The symbol no. 2, to which the current non-terminal symbol gets reduced. + The symbol no. 3, to which the current non-terminal symbol gets reduced. + The symbol no. 4, to which the current non-terminal symbol gets reduced. + The symbol no. 5, to which the current non-terminal symbol gets reduced. + The symbol no. 6, to which the current non-terminal symbol gets reduced. + The symbol no. 7, to which the current non-terminal symbol gets reduced. + The symbol no. 8, to which the current non-terminal symbol gets reduced. + The symbol no. 9, to which the current non-terminal symbol gets reduced. + The symbol no. 10, to which the current non-terminal symbol gets reduced. + The symbol no. 11, to which the current non-terminal symbol gets reduced. + The symbol no. 12, to which the current non-terminal symbol gets reduced. + The symbol no. 13, to which the current non-terminal symbol gets reduced. + The generic production function which gets called upon reduction. The function accepts the value stored inside the produced symbols, and returns the value to be stored inside the non-terminal symbol "". + The newly created production rule. +
+ + + Creates a new production rule on the given non-terminal symbol using the given production function. +
+ This production represents the reducing of the given non-terminal to the 15 given symbols. +
+ The non-terminal symbol which gets reduced. + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The symbol no. 0, to which the current non-terminal symbol gets reduced. + The symbol no. 1, to which the current non-terminal symbol gets reduced. + The symbol no. 2, to which the current non-terminal symbol gets reduced. + The symbol no. 3, to which the current non-terminal symbol gets reduced. + The symbol no. 4, to which the current non-terminal symbol gets reduced. + The symbol no. 5, to which the current non-terminal symbol gets reduced. + The symbol no. 6, to which the current non-terminal symbol gets reduced. + The symbol no. 7, to which the current non-terminal symbol gets reduced. + The symbol no. 8, to which the current non-terminal symbol gets reduced. + The symbol no. 9, to which the current non-terminal symbol gets reduced. + The symbol no. 10, to which the current non-terminal symbol gets reduced. + The symbol no. 11, to which the current non-terminal symbol gets reduced. + The symbol no. 12, to which the current non-terminal symbol gets reduced. + The symbol no. 13, to which the current non-terminal symbol gets reduced. + The symbol no. 14, to which the current non-terminal symbol gets reduced. + The generic production function which gets called upon reduction. The function accepts the value stored inside the produced symbols, and returns the value to be stored inside the non-terminal symbol "". + The newly created production rule. +
+ + + Creates a new production rule on the given non-terminal symbol using the given production function. +
+ This production represents the reducing of the given non-terminal to the 16 given symbols. +
+ The non-terminal symbol which gets reduced. + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The symbol no. 0, to which the current non-terminal symbol gets reduced. + The symbol no. 1, to which the current non-terminal symbol gets reduced. + The symbol no. 2, to which the current non-terminal symbol gets reduced. + The symbol no. 3, to which the current non-terminal symbol gets reduced. + The symbol no. 4, to which the current non-terminal symbol gets reduced. + The symbol no. 5, to which the current non-terminal symbol gets reduced. + The symbol no. 6, to which the current non-terminal symbol gets reduced. + The symbol no. 7, to which the current non-terminal symbol gets reduced. + The symbol no. 8, to which the current non-terminal symbol gets reduced. + The symbol no. 9, to which the current non-terminal symbol gets reduced. + The symbol no. 10, to which the current non-terminal symbol gets reduced. + The symbol no. 11, to which the current non-terminal symbol gets reduced. + The symbol no. 12, to which the current non-terminal symbol gets reduced. + The symbol no. 13, to which the current non-terminal symbol gets reduced. + The symbol no. 14, to which the current non-terminal symbol gets reduced. + The symbol no. 15, to which the current non-terminal symbol gets reduced. + The generic production function which gets called upon reduction. The function accepts the value stored inside the produced symbols, and returns the value to be stored inside the non-terminal symbol "". + The newly created production rule. +
+ + + Creates a new parser based on the current configurator and returns it. + + Note: Only the first call of creates a new parser. If you wish to reset the generated parser and re-create it, call the method beforehand. + + The constructed parser. + + + + Resets the constructed parser to , thereby forcing the parser to be re-constructed based on the current conficuration the next time is called. + + + + + Constructs the parser. This method must be implemented by every constructor based on + + The non-terminal production start symbol. The value of this symbol will be returned when the constructed parser is executed. + + + + Represents a wrapper for the generic parser. + + + + + The internal parser instance. + + + + + Tries to parse the given string and returns whether the parsing was successful. + + The input string. + The parsed value. + + + + Parses the given string and returns the parsed value of the type . + + The input string. + The parsed value. + + + + An interface for generic terminal wrappers. + + + + + The underlying terminal symbol. + + + + + Represents a generic symbol wrapper. + + The generic type stored inside the symbol. + + + + The underlying (boxed) symbol. + + + + + The type of the generic value stored inside the symbol. + + + + + Creates a new generic symbol wrapper for the given (boxed) symbol. + + Boxed symbol. + + + + + + + Represents a generic terminal symbol wrapper. + + + The generic type stored inside the symbol. + + + + + + + Creates a new generic symbol wrapper for the given (boxed) terminal symbol. + + Boxed terminal symbol. + + + + Represents a generic non-terminal symbol wrapper. + + + The generic type stored inside the symbol. + + + + Creates a new generic symbol wrapper for the given (boxed) non-terminal symbol. + + Boxed non-terminal symbol. + + + + Creates a new (empty) production rule on the current non-terminal symbol and returns it. + + The newly created production rule. + + + + Creates a new production rule on the current non-terminal symbol. +
+ This production represents the reducing of the current non-terminal to the 1 given symbols. +
+ The generic type of . + The symbol no. 0, to which the current non-terminal symbol gets reduced. + The newly created production rule. +
+ + + Creates a new production rule on the current non-terminal symbol. +
+ This production represents the reducing of the current non-terminal to the 2 given symbols. +
+ The generic type of . + The generic type of . + The symbol no. 0, to which the current non-terminal symbol gets reduced. + The symbol no. 1, to which the current non-terminal symbol gets reduced. + The newly created production rule. +
+ + + Creates a new production rule on the current non-terminal symbol. +
+ This production represents the reducing of the current non-terminal to the 3 given symbols. +
+ The generic type of . + The generic type of . + The generic type of . + The symbol no. 0, to which the current non-terminal symbol gets reduced. + The symbol no. 1, to which the current non-terminal symbol gets reduced. + The symbol no. 2, to which the current non-terminal symbol gets reduced. + The newly created production rule. +
+ + + Creates a new production rule on the current non-terminal symbol. +
+ This production represents the reducing of the current non-terminal to the 4 given symbols. +
+ The generic type of . + The generic type of . + The generic type of . + The generic type of . + The symbol no. 0, to which the current non-terminal symbol gets reduced. + The symbol no. 1, to which the current non-terminal symbol gets reduced. + The symbol no. 2, to which the current non-terminal symbol gets reduced. + The symbol no. 3, to which the current non-terminal symbol gets reduced. + The newly created production rule. +
+ + + Creates a new production rule on the current non-terminal symbol. +
+ This production represents the reducing of the current non-terminal to the 5 given symbols. +
+ The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The symbol no. 0, to which the current non-terminal symbol gets reduced. + The symbol no. 1, to which the current non-terminal symbol gets reduced. + The symbol no. 2, to which the current non-terminal symbol gets reduced. + The symbol no. 3, to which the current non-terminal symbol gets reduced. + The symbol no. 4, to which the current non-terminal symbol gets reduced. + The newly created production rule. +
+ + + Creates a new production rule on the current non-terminal symbol. +
+ This production represents the reducing of the current non-terminal to the 6 given symbols. +
+ The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The symbol no. 0, to which the current non-terminal symbol gets reduced. + The symbol no. 1, to which the current non-terminal symbol gets reduced. + The symbol no. 2, to which the current non-terminal symbol gets reduced. + The symbol no. 3, to which the current non-terminal symbol gets reduced. + The symbol no. 4, to which the current non-terminal symbol gets reduced. + The symbol no. 5, to which the current non-terminal symbol gets reduced. + The newly created production rule. +
+ + + Creates a new production rule on the current non-terminal symbol. +
+ This production represents the reducing of the current non-terminal to the 7 given symbols. +
+ The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The symbol no. 0, to which the current non-terminal symbol gets reduced. + The symbol no. 1, to which the current non-terminal symbol gets reduced. + The symbol no. 2, to which the current non-terminal symbol gets reduced. + The symbol no. 3, to which the current non-terminal symbol gets reduced. + The symbol no. 4, to which the current non-terminal symbol gets reduced. + The symbol no. 5, to which the current non-terminal symbol gets reduced. + The symbol no. 6, to which the current non-terminal symbol gets reduced. + The newly created production rule. +
+ + + Creates a new production rule on the current non-terminal symbol. +
+ This production represents the reducing of the current non-terminal to the 8 given symbols. +
+ The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The symbol no. 0, to which the current non-terminal symbol gets reduced. + The symbol no. 1, to which the current non-terminal symbol gets reduced. + The symbol no. 2, to which the current non-terminal symbol gets reduced. + The symbol no. 3, to which the current non-terminal symbol gets reduced. + The symbol no. 4, to which the current non-terminal symbol gets reduced. + The symbol no. 5, to which the current non-terminal symbol gets reduced. + The symbol no. 6, to which the current non-terminal symbol gets reduced. + The symbol no. 7, to which the current non-terminal symbol gets reduced. + The newly created production rule. +
+ + + Creates a new production rule on the current non-terminal symbol. +
+ This production represents the reducing of the current non-terminal to the 9 given symbols. +
+ The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The symbol no. 0, to which the current non-terminal symbol gets reduced. + The symbol no. 1, to which the current non-terminal symbol gets reduced. + The symbol no. 2, to which the current non-terminal symbol gets reduced. + The symbol no. 3, to which the current non-terminal symbol gets reduced. + The symbol no. 4, to which the current non-terminal symbol gets reduced. + The symbol no. 5, to which the current non-terminal symbol gets reduced. + The symbol no. 6, to which the current non-terminal symbol gets reduced. + The symbol no. 7, to which the current non-terminal symbol gets reduced. + The symbol no. 8, to which the current non-terminal symbol gets reduced. + The newly created production rule. +
+ + + Creates a new production rule on the current non-terminal symbol. +
+ This production represents the reducing of the current non-terminal to the 10 given symbols. +
+ The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The symbol no. 0, to which the current non-terminal symbol gets reduced. + The symbol no. 1, to which the current non-terminal symbol gets reduced. + The symbol no. 2, to which the current non-terminal symbol gets reduced. + The symbol no. 3, to which the current non-terminal symbol gets reduced. + The symbol no. 4, to which the current non-terminal symbol gets reduced. + The symbol no. 5, to which the current non-terminal symbol gets reduced. + The symbol no. 6, to which the current non-terminal symbol gets reduced. + The symbol no. 7, to which the current non-terminal symbol gets reduced. + The symbol no. 8, to which the current non-terminal symbol gets reduced. + The symbol no. 9, to which the current non-terminal symbol gets reduced. + The newly created production rule. +
+ + + Creates a new production rule on the current non-terminal symbol. +
+ This production represents the reducing of the current non-terminal to the 11 given symbols. +
+ The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The symbol no. 0, to which the current non-terminal symbol gets reduced. + The symbol no. 1, to which the current non-terminal symbol gets reduced. + The symbol no. 2, to which the current non-terminal symbol gets reduced. + The symbol no. 3, to which the current non-terminal symbol gets reduced. + The symbol no. 4, to which the current non-terminal symbol gets reduced. + The symbol no. 5, to which the current non-terminal symbol gets reduced. + The symbol no. 6, to which the current non-terminal symbol gets reduced. + The symbol no. 7, to which the current non-terminal symbol gets reduced. + The symbol no. 8, to which the current non-terminal symbol gets reduced. + The symbol no. 9, to which the current non-terminal symbol gets reduced. + The symbol no. 10, to which the current non-terminal symbol gets reduced. + The newly created production rule. +
+ + + Creates a new production rule on the current non-terminal symbol. +
+ This production represents the reducing of the current non-terminal to the 12 given symbols. +
+ The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The symbol no. 0, to which the current non-terminal symbol gets reduced. + The symbol no. 1, to which the current non-terminal symbol gets reduced. + The symbol no. 2, to which the current non-terminal symbol gets reduced. + The symbol no. 3, to which the current non-terminal symbol gets reduced. + The symbol no. 4, to which the current non-terminal symbol gets reduced. + The symbol no. 5, to which the current non-terminal symbol gets reduced. + The symbol no. 6, to which the current non-terminal symbol gets reduced. + The symbol no. 7, to which the current non-terminal symbol gets reduced. + The symbol no. 8, to which the current non-terminal symbol gets reduced. + The symbol no. 9, to which the current non-terminal symbol gets reduced. + The symbol no. 10, to which the current non-terminal symbol gets reduced. + The symbol no. 11, to which the current non-terminal symbol gets reduced. + The newly created production rule. +
+ + + Creates a new production rule on the current non-terminal symbol. +
+ This production represents the reducing of the current non-terminal to the 13 given symbols. +
+ The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The symbol no. 0, to which the current non-terminal symbol gets reduced. + The symbol no. 1, to which the current non-terminal symbol gets reduced. + The symbol no. 2, to which the current non-terminal symbol gets reduced. + The symbol no. 3, to which the current non-terminal symbol gets reduced. + The symbol no. 4, to which the current non-terminal symbol gets reduced. + The symbol no. 5, to which the current non-terminal symbol gets reduced. + The symbol no. 6, to which the current non-terminal symbol gets reduced. + The symbol no. 7, to which the current non-terminal symbol gets reduced. + The symbol no. 8, to which the current non-terminal symbol gets reduced. + The symbol no. 9, to which the current non-terminal symbol gets reduced. + The symbol no. 10, to which the current non-terminal symbol gets reduced. + The symbol no. 11, to which the current non-terminal symbol gets reduced. + The symbol no. 12, to which the current non-terminal symbol gets reduced. + The newly created production rule. +
+ + + Creates a new production rule on the current non-terminal symbol. +
+ This production represents the reducing of the current non-terminal to the 14 given symbols. +
+ The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The symbol no. 0, to which the current non-terminal symbol gets reduced. + The symbol no. 1, to which the current non-terminal symbol gets reduced. + The symbol no. 2, to which the current non-terminal symbol gets reduced. + The symbol no. 3, to which the current non-terminal symbol gets reduced. + The symbol no. 4, to which the current non-terminal symbol gets reduced. + The symbol no. 5, to which the current non-terminal symbol gets reduced. + The symbol no. 6, to which the current non-terminal symbol gets reduced. + The symbol no. 7, to which the current non-terminal symbol gets reduced. + The symbol no. 8, to which the current non-terminal symbol gets reduced. + The symbol no. 9, to which the current non-terminal symbol gets reduced. + The symbol no. 10, to which the current non-terminal symbol gets reduced. + The symbol no. 11, to which the current non-terminal symbol gets reduced. + The symbol no. 12, to which the current non-terminal symbol gets reduced. + The symbol no. 13, to which the current non-terminal symbol gets reduced. + The newly created production rule. +
+ + + Creates a new production rule on the current non-terminal symbol. +
+ This production represents the reducing of the current non-terminal to the 15 given symbols. +
+ The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The symbol no. 0, to which the current non-terminal symbol gets reduced. + The symbol no. 1, to which the current non-terminal symbol gets reduced. + The symbol no. 2, to which the current non-terminal symbol gets reduced. + The symbol no. 3, to which the current non-terminal symbol gets reduced. + The symbol no. 4, to which the current non-terminal symbol gets reduced. + The symbol no. 5, to which the current non-terminal symbol gets reduced. + The symbol no. 6, to which the current non-terminal symbol gets reduced. + The symbol no. 7, to which the current non-terminal symbol gets reduced. + The symbol no. 8, to which the current non-terminal symbol gets reduced. + The symbol no. 9, to which the current non-terminal symbol gets reduced. + The symbol no. 10, to which the current non-terminal symbol gets reduced. + The symbol no. 11, to which the current non-terminal symbol gets reduced. + The symbol no. 12, to which the current non-terminal symbol gets reduced. + The symbol no. 13, to which the current non-terminal symbol gets reduced. + The symbol no. 14, to which the current non-terminal symbol gets reduced. + The newly created production rule. +
+ + + Creates a new production rule on the current non-terminal symbol. +
+ This production represents the reducing of the current non-terminal to the 16 given symbols. +
+ The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The generic type of . + The symbol no. 0, to which the current non-terminal symbol gets reduced. + The symbol no. 1, to which the current non-terminal symbol gets reduced. + The symbol no. 2, to which the current non-terminal symbol gets reduced. + The symbol no. 3, to which the current non-terminal symbol gets reduced. + The symbol no. 4, to which the current non-terminal symbol gets reduced. + The symbol no. 5, to which the current non-terminal symbol gets reduced. + The symbol no. 6, to which the current non-terminal symbol gets reduced. + The symbol no. 7, to which the current non-terminal symbol gets reduced. + The symbol no. 8, to which the current non-terminal symbol gets reduced. + The symbol no. 9, to which the current non-terminal symbol gets reduced. + The symbol no. 10, to which the current non-terminal symbol gets reduced. + The symbol no. 11, to which the current non-terminal symbol gets reduced. + The symbol no. 12, to which the current non-terminal symbol gets reduced. + The symbol no. 13, to which the current non-terminal symbol gets reduced. + The symbol no. 14, to which the current non-terminal symbol gets reduced. + The symbol no. 15, to which the current non-terminal symbol gets reduced. + The newly created production rule. +
+ + + Represents an abstract generic production wrapper. + + The generic return type of the production. This is the type stored inside the non-terminal which gets reduced by the production represented by this wrapper. + + + + The underlying (boxed) production of this wrapper. + + + + + Creates a new abstract generic production wrapper based on the given (boxed) production. + + Boxed production instance. + + + + Configures the production to reduce the non-terminal to the first symbol. This is equivalent to with the index 0. + + The current instance. + + + + Sets given precedence group to the current production. + + Precedence group to the assigned to the current production. + The current instance. + + + + Represents a generic reduce function of the type "(R) -> ". + + + The generic return type of the production. + + + + Creates a new generic production wrapper based on the given (boxed) production. + + + Boxed production instance. + + + + Represents a generic reduce function of the type "(T0, R) -> ". + + + The generic input type of the symbol no. 0. + The generic return type of the production. + + + + Creates a new generic production wrapper based on the given (boxed) production. + + + Boxed production instance. + + + + Represents a generic reduce function of the type "(T0, T1, R) -> ". + + + The generic input type of the symbol no. 0. + The generic input type of the symbol no. 1. + The generic return type of the production. + + + + Creates a new generic production wrapper based on the given (boxed) production. + + + Boxed production instance. + + + + Represents a generic reduce function of the type "(T0, T1, T2, R) -> ". + + + The generic input type of the symbol no. 0. + The generic input type of the symbol no. 1. + The generic input type of the symbol no. 2. + The generic return type of the production. + + + + Creates a new generic production wrapper based on the given (boxed) production. + + + Boxed production instance. + + + + Represents a generic reduce function of the type "(T0, T1, T2, T3, R) -> ". + + + The generic input type of the symbol no. 0. + The generic input type of the symbol no. 1. + The generic input type of the symbol no. 2. + The generic input type of the symbol no. 3. + The generic return type of the production. + + + + Creates a new generic production wrapper based on the given (boxed) production. + + + Boxed production instance. + + + + Represents a generic reduce function of the type "(T0, T1, T2, T3, T4, R) -> ". + + + The generic input type of the symbol no. 0. + The generic input type of the symbol no. 1. + The generic input type of the symbol no. 2. + The generic input type of the symbol no. 3. + The generic input type of the symbol no. 4. + The generic return type of the production. + + + + Creates a new generic production wrapper based on the given (boxed) production. + + + Boxed production instance. + + + + Represents a generic reduce function of the type "(T0, T1, T2, T3, T4, T5, R) -> ". + + + The generic input type of the symbol no. 0. + The generic input type of the symbol no. 1. + The generic input type of the symbol no. 2. + The generic input type of the symbol no. 3. + The generic input type of the symbol no. 4. + The generic input type of the symbol no. 5. + The generic return type of the production. + + + + Creates a new generic production wrapper based on the given (boxed) production. + + + Boxed production instance. + + + + Represents a generic reduce function of the type "(T0, T1, T2, T3, T4, T5, T6, R) -> ". + + + The generic input type of the symbol no. 0. + The generic input type of the symbol no. 1. + The generic input type of the symbol no. 2. + The generic input type of the symbol no. 3. + The generic input type of the symbol no. 4. + The generic input type of the symbol no. 5. + The generic input type of the symbol no. 6. + The generic return type of the production. + + + + Creates a new generic production wrapper based on the given (boxed) production. + + + Boxed production instance. + + + + Represents a generic reduce function of the type "(T0, T1, T2, T3, T4, T5, T6, T7, R) -> ". + + + The generic input type of the symbol no. 0. + The generic input type of the symbol no. 1. + The generic input type of the symbol no. 2. + The generic input type of the symbol no. 3. + The generic input type of the symbol no. 4. + The generic input type of the symbol no. 5. + The generic input type of the symbol no. 6. + The generic input type of the symbol no. 7. + The generic return type of the production. + + + + Creates a new generic production wrapper based on the given (boxed) production. + + + Boxed production instance. + + + + Represents a generic reduce function of the type "(T0, T1, T2, T3, T4, T5, T6, T7, T8, R) -> ". + + + The generic input type of the symbol no. 0. + The generic input type of the symbol no. 1. + The generic input type of the symbol no. 2. + The generic input type of the symbol no. 3. + The generic input type of the symbol no. 4. + The generic input type of the symbol no. 5. + The generic input type of the symbol no. 6. + The generic input type of the symbol no. 7. + The generic input type of the symbol no. 8. + The generic return type of the production. + + + + Creates a new generic production wrapper based on the given (boxed) production. + + + Boxed production instance. + + + + Represents a generic reduce function of the type "(T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, R) -> ". + + + The generic input type of the symbol no. 0. + The generic input type of the symbol no. 1. + The generic input type of the symbol no. 2. + The generic input type of the symbol no. 3. + The generic input type of the symbol no. 4. + The generic input type of the symbol no. 5. + The generic input type of the symbol no. 6. + The generic input type of the symbol no. 7. + The generic input type of the symbol no. 8. + The generic input type of the symbol no. 9. + The generic return type of the production. + + + + Creates a new generic production wrapper based on the given (boxed) production. + + + Boxed production instance. + + + + Represents a generic reduce function of the type "(T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, R) -> ". + + + The generic input type of the symbol no. 0. + The generic input type of the symbol no. 1. + The generic input type of the symbol no. 2. + The generic input type of the symbol no. 3. + The generic input type of the symbol no. 4. + The generic input type of the symbol no. 5. + The generic input type of the symbol no. 6. + The generic input type of the symbol no. 7. + The generic input type of the symbol no. 8. + The generic input type of the symbol no. 9. + The generic input type of the symbol no. 10. + The generic return type of the production. + + + + Creates a new generic production wrapper based on the given (boxed) production. + + + Boxed production instance. + + + + Represents a generic reduce function of the type "(T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, R) -> ". + + + The generic input type of the symbol no. 0. + The generic input type of the symbol no. 1. + The generic input type of the symbol no. 2. + The generic input type of the symbol no. 3. + The generic input type of the symbol no. 4. + The generic input type of the symbol no. 5. + The generic input type of the symbol no. 6. + The generic input type of the symbol no. 7. + The generic input type of the symbol no. 8. + The generic input type of the symbol no. 9. + The generic input type of the symbol no. 10. + The generic input type of the symbol no. 11. + The generic return type of the production. + + + + Creates a new generic production wrapper based on the given (boxed) production. + + + Boxed production instance. + + + + Represents a generic reduce function of the type "(T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, R) -> ". + + + The generic input type of the symbol no. 0. + The generic input type of the symbol no. 1. + The generic input type of the symbol no. 2. + The generic input type of the symbol no. 3. + The generic input type of the symbol no. 4. + The generic input type of the symbol no. 5. + The generic input type of the symbol no. 6. + The generic input type of the symbol no. 7. + The generic input type of the symbol no. 8. + The generic input type of the symbol no. 9. + The generic input type of the symbol no. 10. + The generic input type of the symbol no. 11. + The generic input type of the symbol no. 12. + The generic return type of the production. + + + + Creates a new generic production wrapper based on the given (boxed) production. + + + Boxed production instance. + + + + Represents a generic reduce function of the type "(T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, R) -> ". + + + The generic input type of the symbol no. 0. + The generic input type of the symbol no. 1. + The generic input type of the symbol no. 2. + The generic input type of the symbol no. 3. + The generic input type of the symbol no. 4. + The generic input type of the symbol no. 5. + The generic input type of the symbol no. 6. + The generic input type of the symbol no. 7. + The generic input type of the symbol no. 8. + The generic input type of the symbol no. 9. + The generic input type of the symbol no. 10. + The generic input type of the symbol no. 11. + The generic input type of the symbol no. 12. + The generic input type of the symbol no. 13. + The generic return type of the production. + + + + Creates a new generic production wrapper based on the given (boxed) production. + + + Boxed production instance. + + + + Represents a generic reduce function of the type "(T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, R) -> ". + + + The generic input type of the symbol no. 0. + The generic input type of the symbol no. 1. + The generic input type of the symbol no. 2. + The generic input type of the symbol no. 3. + The generic input type of the symbol no. 4. + The generic input type of the symbol no. 5. + The generic input type of the symbol no. 6. + The generic input type of the symbol no. 7. + The generic input type of the symbol no. 8. + The generic input type of the symbol no. 9. + The generic input type of the symbol no. 10. + The generic input type of the symbol no. 11. + The generic input type of the symbol no. 12. + The generic input type of the symbol no. 13. + The generic input type of the symbol no. 14. + The generic return type of the production. + + + + Creates a new generic production wrapper based on the given (boxed) production. + + + Boxed production instance. + + + + Represents a generic reduce function of the type "(T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, R) -> ". + + + The generic input type of the symbol no. 0. + The generic input type of the symbol no. 1. + The generic input type of the symbol no. 2. + The generic input type of the symbol no. 3. + The generic input type of the symbol no. 4. + The generic input type of the symbol no. 5. + The generic input type of the symbol no. 6. + The generic input type of the symbol no. 7. + The generic input type of the symbol no. 8. + The generic input type of the symbol no. 9. + The generic input type of the symbol no. 10. + The generic input type of the symbol no. 11. + The generic input type of the symbol no. 12. + The generic input type of the symbol no. 13. + The generic input type of the symbol no. 14. + The generic input type of the symbol no. 15. + The generic return type of the production. + + + + Creates a new generic production wrapper based on the given (boxed) production. + + + Boxed production instance. + Set additional settings for the lexer @@ -476,6 +2059,11 @@ Set the list of regular expressions to ignore. The default is to ignore all kinds of whitespace. + + + Specifies whether the lexer is case-insensitive (), or case-sensitive (). + + Gets and sets the runtime of the constructed lexer. See the enumeration LexerRuntime for an @@ -490,7 +2078,7 @@ - Creates a production on a given nonterminal. The parts parameter may contains either + Creates a production on a given non-terminal. The parts parameter may contains either previously declared symbols of the grammar or strings, which are interpreted as terminals which may be given unescaped as per the lexer settings of the main configurator object. If an empty rule is desired you may pass no parameters to the Production. Null must not be passed. @@ -515,14 +2103,14 @@ Regular expression to match Action to take on parsing. If null is passed the default action is f => default(T) If true, this terminal takes precedence over previously created terminals - A terminal symbol + The newly created terminal symbol. - Create a new NonTerminal. Production actions may be specified directly, or deferred until later. The + Creates a new non-terminal. Production actions may be specified directly, or deferred until later. The latter is more typical since rules are often recursive in their nature. - + The newly created non-terminal symbol. @@ -558,18 +2146,66 @@ Symbols to set associativity on + + + Sets the given non-terminal symbol as start symbol + + Non-terminal symbol + Creates a parser based on the inputted configuration. If a lexer has been desired as well, this method will also create the lexer. The created parser + + + A static class containing generic extension methods for . + + + + + Semantic value of tokens. + The parser configurator. + Regular expression to match. + The value stored inside the terminal symbol. + If true, this terminal takes precedence over previously created terminals. + The newly created terminal symbol. + + + + Semantic value of tokens. + The parser configurator. + The non-terminal symbol's name. + The newly created non-terminal symbol. + + + + Creates a new reduce production on the given non-terminal symbol. + + The parameter may contains either + previously declared symbols of the grammar or strings, which are interpreted as terminals + which may be given unescaped as per the lexer settings of the main configurator object. + If an empty rule is desired you may pass no parameters to the Production. + + Semantic value of tokens. + The non-terminal symbol. + Parts of rule to configure the production + A production configurator for the created production, for addition configuration. + Fluent configuration interface for productions Semantic type of tokens + + + Specifies a reduction function to be performed when parsing applies the production rule + + Function that takes each of the elements in the given rule and returns a new element. Elements in + input array are ordered the same way as in the production. + Specifies a reduction function to be performed when parsing applies the production rule @@ -594,26 +2230,26 @@ Precedence level to use - + Set the error reporting function. This is only valid if the rule in question catches the Error token as predefined by the configurator. Error handler function - - - Base class of symbols in the grammar - - Semantic token value type - - + DebugName is exclusively used for debugging purposes, as the name implies. Setting the debug name gives an easier-to-read error reporting when a parser configuration fails, but it is entirely optional to set this. + + + Base class of symbols in the grammar + + Semantic token value type + A terminal symbol in the grammar. A terminal symbol may not contain production rules. @@ -622,7 +2258,7 @@ Semantic token value type - + Regular expression this terminal recognizes @@ -739,24 +2375,28 @@ - A rule which can be applied on a reduction + A rule which can be applied on a reduction. Parser value type + + + The non-terminal symbol, to which the current rule will be reduced. + + - Number of tokens to pop from the parsing stack when rule is applied + Number of tokens to pop from the parsing stack when rule is applied. - The token number of the resulting symbol to push on the parse stack + The token number of the resulting symbol to push on the parse stack. - The reduction function to apply. This may also handle an exception in the case - of error recovery. The exception parameter will be null if no error has occurred. + The reduction function to apply. This may also handle an exception in the case of error recovery. The exception parameter will be null if no error has occurred. @@ -766,12 +2406,6 @@ Semantic value of symbols used in the grammar - - - Create a new reduce reduce conflict exception - - Exception message - The reduce symbol that existed in the parse table before the new reduce symbol was applied. @@ -782,19 +2416,18 @@ The reduce symbol that the parser generator tried to apply. + + + Create a new reduce reduce conflict exception + + Exception message + A shift reduce conflict exception is thrown by the parser generator when the grammar is ambiguous in such a way that the parser cannot decide if to shift another token or to reduce by a given rule. - - - - - Construct a new shift reduce exception - - Exception message @@ -806,6 +2439,11 @@ The reduce symbol in the conflict + + + Construct a new shift reduce exception + + This interface describes a Piglet generated parser. @@ -830,14 +2468,6 @@ Input string to parse The resulting semantic value symbol - - - Parse an input string, returning the resulting semantic value type that is left on the parse - stack. - - Input stream to parse - The resulting semantic value symbol - ParseExceptions are thrown when the parser detects an illegal token according to the given @@ -870,6 +2500,26 @@ The token ID of the token that was found. + + + The current line number in the input text + + + + + The contents so far of the current line + + + + + The current character index inside the current line (zero-based). + + + + + The current character index in the input text (zero-based). + + Construct a new ParseException diff --git a/README.md b/README.md index 8e01030..b46ee97 100644 --- a/README.md +++ b/README.md @@ -1,30 +1,28 @@ -Piglet, the little friendly parser and lexer tool -================================================= +# Piglet, the little friendly parser and lexer tool + +[![Build status](https://ci.appveyor.com/api/projects/status/7k5vohj4lhmdhac1?svg=true) +![Test status](http://teststatusbadge.azurewebsites.net/api/status/unknown6656/piglet)](https://ci.appveyor.com/project/Unknown6656/piglet) + +[![Build history](https://buildstats.info/appveyor/chart/unknown6656/piglet?buildCount=20&includeBuildsFromPullRequest=true&showStats=true)](https://ci.appveyor.com/project/unknown6656/piglet/history) -Piglet is a library for lexing and parsing text, in the spirit of those big parser and lexer genererators such as bison, antlr and flex. While not as feature packed as those, it is also a whole lot leaner and much easier to understand. -Mission statement ------------------ +Piglet is a library for lexing and parsing text, in the spirit of those big parser and lexer genererators such as bison, antlr and flex. While not as feature packed as those, it is also a whole lot leaner and much easier to understand. +### Mission statement * To broaden the use of real parsing and lexer to extend to more than compiler construction * To be a dependency free library that is embeddable in your code without requiring a compile-time step * To be easy to use * To be a source of understanding of the underlying algorithms by using understandable code -Why use piglet -============== +## Why use piglet Piglets mission in life is to fill the void where regular expressions aren't enough and a full-blown parser generator is way too much work to integrate into your project. A typical example is when you have hierarchical data, something which regular expressions cannot parse. Don't revert to hand writing parsers when Piglet can help you out! -How to use -========== - +## How to use Piglet is composed of two parts, a lexer and a parser. -Parser ------- - +### Parser Parsing is inheritly a complex subject, and Piglet tries it's best to make it as accessible as possible by using a fluent format that actually tells you what is going to happen. You declare rules, that themselves may contain other rules. The first rule that you define is what the entire thing must be reduced to using the other rules. @@ -91,9 +89,7 @@ int result = parser.Parse(new StringReader("7+8*2-2+2")); Assert.AreEqual(23, result); ``` -Lexer ------ - +### Lexer Sometimes you don't need a full parser, but only a tool to identify tokens. This the sort of work that you typically do using a series of regular expressions or perhaps a lot of tryParse. A lexer is a tool for identifying tokens in a much more flexible way than doing it yourself. It is also more efficient. An example: ```csharp @@ -162,14 +158,13 @@ foreach (var token in lexer.Tokenize("up down left right right north west left n } ``` -More samples and documentation ------------------------------- +## Generic Parser +[TODO !!] +## More samples and documentation Piglet is quite extensively covered by integration type tests, that provides many sample uses of both the parser and the lexer. There is also the wiki here on github which I hope will get filled out as this library matures. There is also a Demo project that comes with the Solution, which has a few interesting sample uses of both the lexer and parser components. -Releases --------- - +# Releases Releases are numbered in major, minor and revision number. * Major number are updated on major changes which are not backwards compatible. @@ -180,7 +175,7 @@ All releases are available from both NuGet, and are always represented as tags o Apart from compiling the source yourself, the easiest way to get your hands on the library is to use NuGet. Just search for Piglet, and you shall be rewarded. -# 1.4.0 +### 1.4.0 * Added thread safety to lexing and parsing. * Improved lexer usage. Tokenize is now new preferred method of listing tokens, which is also thread safe. * Made \w and \d more consistent with MS use of the term. @@ -188,17 +183,17 @@ Apart from compiling the source yourself, the easiest way to get your hands on t * Added convenience reduction functions for common cases of reducing to a single member in tech configuration * Fixed some left over console output in error recovery -# 1.3.0 +### 1.3.0 * Piglet now supports Unicode! Piglet will now lex the full unicode character set. * You can now specify the lexer runtime, giving you more options on the speed tradeoffs of lexer construction and lexer runtime -# 1.2.2 +### 1.2.2 * Added support for ignoring expressions in fluent configuration parsing. -# 1.2.1 +### 1.2.1 * Added support for escaped characters inside character classes. -# 1.2.0 +### 1.2.0 * Added error recovery and reporting to fluent parser configuration * Added token precedence to fluent configuration * Completed XML documentation to include every method @@ -207,37 +202,35 @@ Apart from compiling the source yourself, the easiest way to get your hands on t * Fixed bug with possible wrong order of defined expressions for fluent configuration * Automated the NuGet package management -# 1.1.0 +### 1.1.0 * Added DFA minimization to the lexer generation algorithm. * Added public dotnotation functionality for getting debug graphs for lexers. * Unit test framework changed to NUnit. -# 1.0.1 +### 1.0.1 * Added missing Piglet.XML file to the NuGet package. Documentation should now be available in intellisense. -# 1.0.0 +### 1.0.0 *First NuGet release -Contributing ------------- - +## Contributing Contributors are welcome at any skill level! Forking the repo is probably the easiest way to get started. There is a nice list of issues, both bugs and features that is up for grabs. Or devise a feature of your own. -Bug tracker ------------ - +## Bug tracker Please create an issue here on GitHub! https://github.com/Dervall/Piglet/issues -Authors -------- - -**Per Dervall** +## Authors +#### Per Dervall + http://twitter.com/perdervall + http://binarysculpting.com -Copyright and license ---------------------- +#### Unknown6656 ++ https://github.com/unknown6656 ++ https://twitter.com/unknown6656 ++ https://youtube.com/unknown6656 ++ https://unknown6656.com -Piglet is licenced under the MIT license. Refer to LICENSE.txt for more information. \ No newline at end of file +## Copyright and license +Piglet is licenced under the MIT license. Refer to LICENSE.txt for more information. diff --git a/SharedAssemblyInfo.cs b/SharedAssemblyInfo.cs index e33e470..19b3e69 100644 --- a/SharedAssemblyInfo.cs +++ b/SharedAssemblyInfo.cs @@ -1,13 +1,11 @@ -using System.Reflection; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; - -// [assembly: AssemblyCompany("")] -[assembly: AssemblyProduct("Piglet")] -[assembly: AssemblyCopyright("Copyright © 2012")] -// [assembly: AssemblyTrademark("")] -// [assembly: AssemblyCulture("")] - - -[assembly: AssemblyVersion("1.5.0")] -[assembly: AssemblyFileVersion("1.5.0")] \ No newline at end of file +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Reflection; + +// [assembly: AssemblyCompany("")] +[assembly: AssemblyProduct("Piglet")] +[assembly: AssemblyCopyright("Copyright © 2012")] +// [assembly: AssemblyTrademark("")] +// [assembly: AssemblyCulture("")] +[assembly: AssemblyVersion("1.6.0")] +[assembly: AssemblyFileVersion("1.6.0")] diff --git a/appveyor.yml b/appveyor.yml new file mode 100644 index 0000000..b749d92 --- /dev/null +++ b/appveyor.yml @@ -0,0 +1,21 @@ +version: 1.6.{build} +image: Visual Studio 2019 +assembly_info: + patch: true + file: '**\AssemblyInfo.*' + assembly_version: '{version}' + assembly_file_version: '{version}' + assembly_informational_version: '{version}' +dotnet_csproj: + patch: true + file: '**\*.csproj' + version: '{version}' + version_prefix: '{version}' + package_version: '{version}' + assembly_version: '{version}' + file_version: '{version}' + informational_version: '{version}' +before_build: +- cmd: nuget restore +build: + verbosity: normal