Parses ABNF with a parser written with nimble_parsec
, emits parser consists of nimble_parsec
combinators.
- Brevity - flattens unnecessary nesting in parsed ABNF
- Easy to config and customize
- Full test coverage
The package is available on Hex
and can be installed by adding abnf_parsec
to your list of dependencies in mix.exs
:
def deps do
[
{:abnf_parsec, "~> 1.0", runtime: false}
]
end
Documentation can be generated with ExDoc and published on HexDocs. Once published, the docs can be found at https://hexdocs.pm/abnf_parsec.
defmodule IPv4Parser do
use AbnfParsec,
abnf: """
ip = first dot second dot third dot fourth
dot = "."
dec-octet =
"25" %x30-35 / ; 250-255
"2" %x30-34 DIGIT / ; 200-249
"1" 2DIGIT / ; 100-199
%x31-39 DIGIT / ; 10-99
DIGIT ; 0-9
first = dec-octet
second = dec-octet
third = dec-octet
fourth = dec-octet
""",
unbox: ["dec-octet"],
ignore: ["dot"],
parse: :ip
end
# IPv4Parser.ip("192.168.0.1")
# IPv4Parser.parse("127.0.0.1")
# IPv4Parser.parse!("10.0.0.1")
iex> IPv4Parser.parse! "10.0.0.1"
[ip: [first: '10', second: '0', third: '0', fourth: '1']]
defmodule JsonParser do
use AbnfParsec,
abnf_file: "test/fixture/json.abnf",
parse: :json_text,
transform: %{
"string" => {:reduce, {List, :to_string, []}},
"int" => [{:reduce, {List, :to_string, []}}, {:map, {String, :to_integer, []}}],
"frac" => {:reduce, {List, :to_string, []}},
"null" => {:replace, nil},
"true" => {:replace, true},
"false" => {:replace, false}
},
untag: ["member"],
unwrap: ["int", "frac"],
unbox: [
"JSON-text",
"null",
"true",
"false",
"digit1-9",
"decimal-point",
"escape",
"unescaped",
"char"
],
ignore: [
"name-separator",
"value-separator",
"quotation-mark",
"begin-object",
"end-object",
"begin-array",
"end-array"
]
end
json = """
{"a": {"b": 1, "c": [true]}, "d": null, "e": "e\\te"}
"""
# JsonParser.json_text(json)
# JsonParser.parse(json)
# JsonParser.parse!(json)
iex> JsonParser.parse! """
...> {"a": {"b": 1, "c": [true]}, "d": null, "e": "e\\te"}
...> """
[
object: [
[
string: ["a"],
value: [
object: [
[string: ["b"], value: [number: [int: 1]]],
[string: ["c"], value: [array: [value: [true]]]]
]
]
],
[string: ["d"], value: [nil]],
[string: ["e"], value: [string: ["e\\te"]]]
]
]
For more details of options for customization, see abnf_parsec.ex
For example, ABNF of ABNF 😉 is parsed
rulelist = 1*( rule / (*c-wsp c-nl) )
rule = rulename defined-as elements c-nl
; continues if next line starts
; with white space
rulename = ALPHA *(ALPHA / DIGIT / "-")
defined-as = *c-wsp ("=" / "=/") *c-wsp
; basic rules definition and
; incremental alternatives
elements = alternation *c-wsp
c-wsp = WSP / (c-nl WSP)
c-nl = comment / CRLF
; comment or newline
comment = ";" *(WSP / VCHAR) CRLF
alternation = concatenation
*(*c-wsp "/" *c-wsp concatenation)
concatenation = repetition *(1*c-wsp repetition)
repetition = [repeat] element
repeat = 1*DIGIT / (*DIGIT "*" *DIGIT)
element = rulename / group / option /
char-val / num-val / prose-val
group = "(" *c-wsp alternation *c-wsp ")"
option = "[" *c-wsp alternation *c-wsp "]"
char-val = DQUOTE *(%x20-21 / %x23-7E) DQUOTE
; quoted string of SP and VCHAR
; without DQUOTE
num-val = "%" (bin-val / dec-val / hex-val)
bin-val = "b" 1*BIT
[ 1*("." 1*BIT) / ("-" 1*BIT) ]
; series of concatenated bit values
; or single ONEOF range
dec-val = "d" 1*DIGIT
[ 1*("." 1*DIGIT) / ("-" 1*DIGIT) ]
hex-val = "x" 1*HEXDIG
[ 1*("." 1*HEXDIG) / ("-" 1*HEXDIG) ]
prose-val = "<" *(%x20-3D / %x3F-7E) ">"
; bracketed string of SP and VCHAR
; without angles
; prose description, to be used as
; last resort
into
[
rule: [
rulename: "rulelist",
repetition: [
repeat: [min: 1],
alternation: [
rulename: "rule",
concatenation: [
repetition: [repeat: [], rulename: "c-wsp"],
rulename: "c-nl"
]
]
]
],
rule: [
rulename: "rule",
concatenation: [
rulename: "rulename",
rulename: "defined-as",
rulename: "elements",
rulename: "c-nl"
],
comment: "continues if next line starts",
comment: "with white space"
],
rule: [
rulename: "rulename",
concatenation: [
rulename: "ALPHA",
repetition: [
repeat: [],
alternation: [{:rulename, "ALPHA"}, {:rulename, "DIGIT"}, "-"]
]
]
],
rule: [
rulename: "defined-as",
concatenation: [
repetition: [repeat: [], rulename: "c-wsp"],
alternation: ["=", "=/"],
repetition: [repeat: [], rulename: "c-wsp"]
],
comment: "basic rules definition and",
comment: "incremental alternatives"
],
rule: [
rulename: "elements",
concatenation: [
rulename: "alternation",
repetition: [repeat: [], rulename: "c-wsp"]
]
],
rule: [
rulename: "c-wsp",
alternation: [
rulename: "WSP",
concatenation: [rulename: "c-nl", rulename: "WSP"]
]
],
rule: [
rulename: "c-nl",
alternation: [rulename: "comment", rulename: "CRLF"],
comment: "comment or newline"
],
rule: [
rulename: "comment",
concatenation: [
";",
{:repetition,
[repeat: [], alternation: [rulename: "WSP", rulename: "VCHAR"]]},
{:rulename, "CRLF"}
]
],
rule: [
rulename: "alternation",
concatenation: [
rulename: "concatenation",
repetition: [
repeat: [],
concatenation: [
{:repetition, [repeat: [], rulename: "c-wsp"]},
"/",
{:repetition, [repeat: [], rulename: "c-wsp"]},
{:rulename, "concatenation"}
]
]
]
],
rule: [
rulename: "concatenation",
concatenation: [
rulename: "repetition",
repetition: [
repeat: [],
concatenation: [
repetition: [repeat: [min: 1], rulename: "c-wsp"],
rulename: "repetition"
]
]
]
],
rule: [
rulename: "repetition",
concatenation: [option: [rulename: "repeat"], rulename: "element"]
],
rule: [
rulename: "repeat",
alternation: [
repetition: [repeat: [min: 1], rulename: "DIGIT"],
concatenation: [
{:repetition, [repeat: [], rulename: "DIGIT"]},
"*",
{:repetition, [repeat: [], rulename: "DIGIT"]}
]
]
],
rule: [
rulename: "element",
alternation: [
rulename: "rulename",
rulename: "group",
rulename: "option",
rulename: "char-val",
rulename: "num-val",
rulename: "prose-val"
]
],
rule: [
rulename: "group",
concatenation: [
"(",
{:repetition, [repeat: [], rulename: "c-wsp"]},
{:rulename, "alternation"},
{:repetition, [repeat: [], rulename: "c-wsp"]},
")"
]
],
rule: [
rulename: "option",
concatenation: [
"[",
{:repetition, [repeat: [], rulename: "c-wsp"]},
{:rulename, "alternation"},
{:repetition, [repeat: [], rulename: "c-wsp"]},
"]"
]
],
rule: [
rulename: "char-val",
concatenation: [
rulename: "DQUOTE",
repetition: [
repeat: [],
alternation: [
num_range: [{:base, "x"}, "20", "21"],
num_range: [{:base, "x"}, "23", "7E"]
]
],
rulename: "DQUOTE"
],
comment: "quoted string of SP and VCHAR",
comment: "without DQUOTE"
],
rule: [
rulename: "num-val",
concatenation: [
"%",
{:alternation,
[rulename: "bin-val", rulename: "dec-val", rulename: "hex-val"]}
]
],
rule: [
rulename: "bin-val",
concatenation: [
"b",
{:repetition, [repeat: [min: 1], rulename: "BIT"]},
{:option,
[
alternation: [
repetition: [
repeat: [min: 1],
concatenation: [
".",
{:repetition, [repeat: [min: 1], rulename: "BIT"]}
]
],
concatenation: [
"-",
{:repetition, [repeat: [min: 1], rulename: "BIT"]}
]
]
]}
],
comment: "series of concatenated bit values",
comment: "or single ONEOF range"
],
rule: [
rulename: "dec-val",
concatenation: [
"d",
{:repetition, [repeat: [min: 1], rulename: "DIGIT"]},
{:option,
[
alternation: [
repetition: [
repeat: [min: 1],
concatenation: [
".",
{:repetition, [repeat: [min: 1], rulename: "DIGIT"]}
]
],
concatenation: [
"-",
{:repetition, [repeat: [min: 1], rulename: "DIGIT"]}
]
]
]}
]
],
rule: [
rulename: "hex-val",
concatenation: [
"x",
{:repetition, [repeat: [min: 1], rulename: "HEXDIG"]},
{:option,
[
alternation: [
repetition: [
repeat: [min: 1],
concatenation: [
".",
{:repetition, [repeat: [min: 1], rulename: "HEXDIG"]}
]
],
concatenation: [
"-",
{:repetition, [repeat: [min: 1], rulename: "HEXDIG"]}
]
]
]}
]
],
rule: [
rulename: "prose-val",
concatenation: [
"<",
{:repetition,
[
repeat: [],
alternation: [
num_range: [{:base, "x"}, "20", "3D"],
num_range: [{:base, "x"}, "3F", "7E"]
]
]},
">"
],
comment: "bracketed string of SP and VCHAR",
comment: "without angles",
comment: "prose description, to be used as",
comment: "last resort"
]
]
And generated parser looks like:
[
defparsec(
:rulelist,
tag(
times(choice([parsec(:rule), repeat(parsec(:c_wsp)) |> parsec(:c_nl)]), min: 1),
:rulelist
)
),
defparsec(
:rule,
tag(parsec(:rulename) |> parsec(:defined_as) |> parsec(:elements) |> parsec(:c_nl), :rule)
),
defparsec(
:rulename,
tag(
parsec(:core_alpha)
|> repeat(choice([parsec(:core_alpha), parsec(:core_digit), string("-")])),
:rulename
)
),
defparsec(
:defined_as,
tag(
repeat(parsec(:c_wsp)) |> choice([string("="), string("=/")]) |> repeat(parsec(:c_wsp)),
:defined_as
)
),
defparsec(:elements, tag(parsec(:alternation) |> repeat(parsec(:c_wsp)), :elements)),
defparsec(:c_wsp, tag(choice([parsec(:core_wsp), parsec(:c_nl) |> parsec(:core_wsp)]), :c_wsp)),
defparsec(:c_nl, tag(choice([parsec(:comment), parsec(:core_crlf)]), :c_nl)),
defparsec(
:comment,
tag(
string(";")
|> repeat(choice([parsec(:core_wsp), parsec(:core_vchar)]))
|> parsec(:core_crlf),
:comment
)
),
defparsec(
:alternation,
tag(
parsec(:concatenation)
|> repeat(
repeat(parsec(:c_wsp))
|> string("/")
|> repeat(parsec(:c_wsp))
|> parsec(:concatenation)
),
:alternation
)
),
defparsec(
:concatenation,
tag(
parsec(:repetition) |> repeat(times(parsec(:c_wsp), min: 1) |> parsec(:repetition)),
:concatenation
)
),
defparsec(:repetition, tag(optional(parsec(:repeat)) |> parsec(:element), :repetition)),
defparsec(
:repeat,
tag(
choice([
times(parsec(:core_digit), min: 1),
repeat(parsec(:core_digit)) |> string("*") |> repeat(parsec(:core_digit))
]),
:repeat
)
),
defparsec(
:element,
tag(
choice([
parsec(:rulename),
parsec(:group),
parsec(:option),
parsec(:char_val),
parsec(:num_val),
parsec(:prose_val)
]),
:element
)
),
defparsec(
:group,
tag(
string("(")
|> repeat(parsec(:c_wsp))
|> parsec(:alternation)
|> repeat(parsec(:c_wsp))
|> string(")"),
:group
)
),
defparsec(
:option,
tag(
string("[")
|> repeat(parsec(:c_wsp))
|> parsec(:alternation)
|> repeat(parsec(:c_wsp))
|> string("]"),
:option
)
),
defparsec(
:char_val,
tag(
parsec(:core_dquote)
|> repeat(choice([ascii_char([32..33]), ascii_char([35..126])]))
|> parsec(:core_dquote),
:char_val
)
),
defparsec(
:num_val,
tag(string("%") |> choice([parsec(:bin_val), parsec(:dec_val), parsec(:hex_val)]), :num_val)
),
defparsec(
:bin_val,
tag(
string("b")
|> times(parsec(:core_bit), min: 1)
|> optional(
choice([
times(string(".") |> times(parsec(:core_bit), min: 1), min: 1),
string("-") |> times(parsec(:core_bit), min: 1)
])
),
:bin_val
)
),
defparsec(
:dec_val,
tag(
string("d")
|> times(parsec(:core_digit), min: 1)
|> optional(
choice([
times(string(".") |> times(parsec(:core_digit), min: 1), min: 1),
string("-") |> times(parsec(:core_digit), min: 1)
])
),
:dec_val
)
),
defparsec(
:hex_val,
tag(
string("x")
|> times(parsec(:core_hexdig), min: 1)
|> optional(
choice([
times(string(".") |> times(parsec(:core_hexdig), min: 1), min: 1),
string("-") |> times(parsec(:core_hexdig), min: 1)
])
),
:hex_val
)
),
defparsec(
:prose_val,
tag(
string("<") |> repeat(choice([ascii_char([32..61]), ascii_char([63..126])])) |> string(">"),
:prose_val
)
)
]