Skip to content

coquelicot/PyDSL

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

37 Commits
 
 
 
 
 
 
 
 

Repository files navigation

PyDSL

A runtime DSL parser generator for python.

How to

  • Define Tokens.
lexRule = r"""#dsl
    identifier ::= /[_a-zA-Z][_a-zA-Z0-9]*/
    number ::= /[0-9]+(\\.[0-9]+)?/
    operator ::= /[+*\/-]/
"""
  • Define Rules.
parseRule = r"""#dsl
    expression ::= operand (operator operand)*
    operand ::= identifier | number
    %expand ::= operand
"""
  • Create Lexer and Parser
import DSL
lexer = DSL.makeLexer(lexRule)
parser = DSL.makeParser(parseRule)
  • Use it!!
data = open("source", "r").read()
tokens = lexer.parse(data)
ast = parser.parse(tokens)
# do things with ast

Candy

In many cases, you can use makeDSL instead of makeLexer and makeParser.

import DSL
dsl = DSL.makeDSL(r"""#dsl
    identifier ::= /[_a-zA-Z][_a-zA-Z0-9]*/
    number ::= /[0-9]+(\\.[0-9]+)?/
    operator ::= /[+*\/-]/
    expression ::= operand (operator operand)*
    operand ::= identifier | number
    %expand ::= operand
""")
#parser.parse(lexer.parse(data))
dsl.parse(data)

Syntax Definition

  • Lexer DSL's lexer in Lexer DSL
%keys ::= '%ignore' '%keys' '::='
comment ::= /#[^\n]*\n/
identifier ::= /[_a-zA-Z][_a-zA-Z0-9]*/
sqString ::= /'[^']*'/
dqString ::= /"[^"\\]*(\\\\.[^"\\]*)*"/
reString ::= /\/[^\/\\]*(\\\\.[^\/\\]*)*\//
%ignore ::= comment
  • Lexer DSL's parser in Parser DSL
LexRules ::= rule*
rule ::= identifier '::=' (sqString | dqString | reString)
       | '%keys' '::=' (sqString | dqString)+
       | '%ignore' '::=' (identifier | sqString | dqString)+
%ignore ::= '::='
  • Parser DSL's lexer in Lexer DSL
%keys ::= '$' '|' '::=' '(' ')' '*' '+' '?'
identifier ::= /[_a-zA-Z][_a-zA-Z0-9]*/
configType ::= /%(ignore|expandSingle|expand)/
sqString ::= /'[^']*'/
dqString ::= /"[^"\\]*(\\\\.[^"\\]*)*"/
comment ::= /#[^\n]*\n/
%ignore ::= comment
  • Parser DSL's parser in Parser DSL
ParseRules ::= rule*
rule ::= identifier '::=' alternate ('|' alternate)*
       | configType '::=' simpleItem+
alternate ::= '$' | rhsItem+
rhsItem ::= itemValue ('?' | '+' | '*')?
itemValue ::= simpleItem | '(' alternate ('|' alternate)* ')'
simpleItem ::= identifier | dqString | sqString
%ignore ::= '::=' '|' '$' '(' ')'
%expand ::= simpleItem
  • DSL DSL's lexer in Lexer DDSL
%keys ::= '$' '|' '::=' '(' ')' '*' '+' '?'
identifier ::= /[_a-zA-Z][_a-zA-Z0-9]*/
sqString ::= /'[^']*'/
dqString ::= /"[^"\\]*(\\\\.[^"\\]*)*"/
reString ::= /\/[^\/\\]*(\\\\.[^\/\\]*)*\//
configType ::= /%(ignore|expandSingle|expand)/
comment ::= /#[^\n]*\n/
%ignore ::= comment
  • DSL DSL's parser in Parser DSL
DSLRules ::= rule*
rule ::= identifier '::=' reString
       | identifier '::=' alternate ('|' alternate)*
       | configType '::=' simpleItem+
alternate ::= '$' | rhsItem+
rhsItem ::= itemValue ('?' | '+' | '*')?
itemValue ::= simpleItem | '(' alternate ('|' alternate)* ')'
simpleItem ::= identifier | dqString | sqString
%ignore ::= '::=' '|' '$' '(' ')'
%expand ::= simpleItem

Examples

A simple calculator.

import DSL
import functools

lexer = DSL.makeLexer(r"""#dsl
    # It's okay to put comment here
    %keys ::= '+' '*' '(' ')'
    # Be careful!! backslash will be escaped twice!!
    # (and thrice if you're not using raw string)
    # what makeLexer get is
    #  /[0-9]+(\\\\.[0-9]+)?/
    # what it pass to regex recognizer is (escape 1)
    #  [0-9]+(\\.[0-9]+)?
    # the regex recognizer will regard it as (escape 2)
    #  mutiple(digit) one_or_no(backslash anychar multiple(digit))
    number ::= /[0-9]+(\\.[0-9]+)?/
    # makeLexer get
    #  /\/\\*[^\\*]*(\\*+[^*\/][^*]*)*\\*+\//
    # regex get
    #  /\*[^\*]*(\*+[^*/][^*]*)*\*+/
    # note that there's no escape in character set [ ]
    # so, the backslash in [^\*] won't be interpret as escape
    # what it means is "anything but star or backslash"
    comment ::= /\/\\*[^\\*]*(\\*+[^*\/][^*]*)*\\*+\//
    # Remove comment from token stream.
    %ignore ::= comment
""")
parser = DSL.makeParser(r"""#dsl
    # You may use brace, *, +, ? just like regex.
    exprAdd ::= exprMul ('+' exprMul)*
    exprMul ::= term ('*' term)*
    term ::= '(' exprAdd ')' | number
    # Remove them from AST
    %ignore ::= '(' ')' '+' '*'
    # Expand the node if it has only one child
    %expandSingle ::= exprAdd exprMul
    # Always expand the node
    %expand ::= term
""")

def evaluateAST(ast):
    if ast.name == 'number':
        return float(ast.value) if '.' in ast.value else int(ast.value)
    else:
        if ast.name == 'exprAdd':
            func = lambda a, b: a + b
        else:
            func = lambda a, b: a * b
        return functools.reduce(func, map(evaluateAST, ast.child))

data = """
    /* The result should be 32.5 */
    1+(2.3+4)*5
"""
tokens = lexer.parse(data)
ast = parser.parse(tokens)
print(evaluateAST(ast))

Json-like parser.

import DSL

jsonDSL = DSL.makeDSL(r"""#dsl
    # Remember, it will be escaped twice
    # makeDSL get
    #  /"[^"\\]*(\\\\.[^"\\]*)*"/
    # regex get
    #  "[^"\]*(\\.[^"\]*)*"
    # blackslash will be interpret as escape as long as it's not in []
    # hence, the double blackslash will be interpret as "a blackslash"
    # the meaning of the regex will be
    # " many_or_no(except " \) many_or_no(blackslash anychar many_or_no(except " \)) "
    string ::= /"[^"\\]*(\\\\.[^"\\]*)*"/
    number ::= /[0-9]+(\\.[0-9])?/

    object ::= '{' (kvPair (',' kvPair)*)? '}' # Nested brace!!
    kvPair ::= string ':' value
    array ::= '[' (value (',' value)*)? ']'
    value ::= string | number | object | array | 'true' | 'false' | 'null'

    %ignore ::= '{' '}' '[' ']' ',' ':'
    %expand ::= value
""")

data = r"""
{
    "key1" : {
        "key2" : [1, 2, 3, 4],
        "key3" : [
            {},
            { "key4" : "value" }
        ]
    },
    "key5" : null,
    "key6" : [
        [1, 2, 3],
        [4.4, 5.5, 6.6],
        ["string", 8, 9.9],
        true
    ]
}
"""
print(jsonDSL.parse(data))

About

A runtime DSL parser generator for python.

Resources

License

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published