diff --git a/src/kinds.jl b/src/kinds.jl index f6706dd2..bf837716 100644 --- a/src/kinds.jl +++ b/src/kinds.jl @@ -1,7 +1,194 @@ # Definition of Kind type - mapping from token string identifiers to # enumeration values as used in @K_str -const _kind_names = -[ + +""" + K"name" + Kind(namestr) + +`Kind` is a type tag for specifying the type of tokens and interior nodes of +a syntax tree. Abstractly, this tag is used to define our own *sum types* for +syntax tree nodes. We do this explicitly outside the Julia type system because +(a) Julia doesn't have sum types and (b) we want concrete data structures which +are unityped from the Julia compiler's point of view, for efficiency. + +Naming rules: +* Kinds which correspond to exactly one textural form are represented with that + text. This includes keywords like K"for" and operators like K"*". +* Kinds which represent many textural forms have UpperCamelCase names. This + includes kinds like K"Identifier" and K"Comment". +* Kinds which exist merely as delimiters are all uppercase +""" +primitive type Kind 16 end + +# The implementation of Kind here is basically similar to @enum. However we use +# the K_str macro to self-name these kinds with their literal representation, +# rather than needing to invent a new name for each. + +const _kind_str_to_int = Dict{String,UInt16}() +const _kind_int_to_str = Dict{UInt16,String}() +const _kind_modules = Dict{Int,Union{Symbol,Module}}( + 0=>:JuliaSyntax, + 1=>:JuliaLowering, + 2=>:JuliaSyntaxFormatter +) +# Number of bits reserved for kind id's belonging to a single module +const _kind_nbits = 10 +const _kind_module_id_max = typemax(UInt16) >> _kind_nbits + +function Kind(x::Integer) + if x < 0 || x > typemax(UInt16) + throw(ArgumentError("Kind out of range: $x")) + end + return Base.bitcast(Kind, convert(UInt16, x)) +end + +function Base.convert(::Type{String}, k::Kind) + _kind_int_to_str[reinterpret(UInt16, k)] +end + +function Base.convert(::Type{Kind}, s::AbstractString) + i = get(_kind_str_to_int, s) do + error("unknown Kind name $(repr(s))") + end + Kind(i) +end + +Base.string(x::Kind) = convert(String, x) +Base.print(io::IO, x::Kind) = print(io, convert(String, x)) + +Base.isless(x::Kind, y::Kind) = reinterpret(UInt16, x) < reinterpret(UInt16, y) + +function Base.show(io::IO, k::Kind) + print(io, "K\"$(convert(String, k))\"") +end + +# Save the string representation rather than the bit pattern so that kinds +# can be serialized and deserialized across different JuliaSyntax versions. +function Base.write(io::IO, k::Kind) + str = convert(String, k) + write(io, UInt8(length(str))) + write(io, str) +end +function Base.read(io::IO, ::Type{Kind}) + len = read(io, UInt8) + str = String(read(io, len)) + convert(Kind, str) +end + +function Base.parentmodule(k::Kind) + mod_id = reinterpret(UInt16, k) >> _kind_nbits + _kind_modules[mod_id]::Module +end + +function _register_kinds!(kind_modules, int_to_kindstr, kind_str_to_int, mod, module_id, names) + if module_id > _kind_module_id_max + error("Kind module id $module_id is out of range") + elseif length(names) >= 1 << _kind_nbits + error("Too many kind names") + elseif !haskey(kind_modules, module_id) + kind_modules[module_id] = mod + else + m = kind_modules[module_id] + if m == nameof(mod) + # Ok: known kind module, but not loaded until now + kind_modules[module_id] = mod + elseif m == mod + existing_kinds = [(i = get(kind_str_to_int, n, nothing); + isnothing(i) ? nothing : Kind(i)) for n in names] + if any(isnothing, existing_kinds) || + !issorted(existing_kinds) || + any(k->parentmodule(k) != mod, existing_kinds) + error("Error registering kinds for module $mod (register_kinds() called more than once inconsistently, or conflict with existing module kinds?)") + else + # Assume we're re-registering kinds as in top level vs `__init__` + return + end + else + error("Kind module ID $module_id already claimed by module $m") + end + end + # Process names to conflate category BEGIN/END markers with the first/last + # in the category. + i = 0 + for name in names + normal_kind = false + if startswith(name, "BEGIN_") + j = i + elseif startswith(name, "END_") + j = i - 1 + else + normal_kind = true + j = i + i += 1 + end + kind_int = (module_id << _kind_nbits) | j + push!(kind_str_to_int, name=>kind_int) + if normal_kind + push!(int_to_kindstr, kind_int=>name) + end + end +end + +""" + register_kinds!(mod, module_id, names) + +Register custom `Kind`s with the given `names`, belonging to a module `mod`. +`names` is an array of arbitrary strings. + +In order for kinds to be represented by a small number of bits, some nontrivial +cooperation is reqired between modules using custom kinds: +* The integer `module_id` is globally unique for each `mod` which will be used + together, and not larger than $_kind_module_id_max. +* No two modules register the same `name`. The semantics of a given `kind` name + should be defined by the module which owns it. + +To allow ranges of kinds to be delimited and quickly tested for, some special +names are allowed: `BEGIN_section` and `END_section` pairs are detected, and +alias the next and previous kind id's respectively so that kinds in `section` +can be tested with `BEGIN_section <= k <= END_section`. +""" +function register_kinds!(mod, module_id, names) + _register_kinds!(_kind_modules, _kind_int_to_str, _kind_str_to_int, mod, module_id, names) +end + +#------------------------------------------------------------------------------- + +""" + K"s" + +The kind of a token or AST internal node with string "s". + +For example +* K")" is the kind of the right parenthesis token +* K"block" is the kind of a block of code (eg, statements within a begin-end). +""" +macro K_str(s) + convert(Kind, s) +end + +""" +A set of kinds which can be used with the `in` operator. For example + + k in KSet"+ - *" +""" +macro KSet_str(str) + kinds = [convert(Kind, s) for s in split(str)] + + quote + ($(kinds...),) + end +end + +""" + kind(x) + +Return the `Kind` of `x`. +""" +kind(k::Kind) = k + + +#------------------------------------------------------------------------------- +# Kinds used by JuliaSyntax +register_kinds!(JuliaSyntax, 0, [ "None" # Placeholder; never emitted by lexer "EndMarker" # EOF "Comment" @@ -918,133 +1105,7 @@ const _kind_names = # Container for a single statement/atom plus any trivia and errors "wrapper" "END_SYNTAX_KINDS" -] - -""" - K"name" - Kind(id) - -`Kind` is a type tag for specifying the type of tokens and interior nodes of -a syntax tree. Abstractly, this tag is used to define our own *sum types* for -syntax tree nodes. We do this explicitly outside the Julia type system because -(a) Julia doesn't have sum types and (b) we want concrete data structures which -are unityped from the Julia compiler's point of view, for efficiency. - -Naming rules: -* Kinds which correspond to exactly one textural form are represented with that - text. This includes keywords like K"for" and operators like K"*". -* Kinds which represent many textural forms have UpperCamelCase names. This - includes kinds like K"Identifier" and K"Comment". -* Kinds which exist merely as delimiters are all uppercase -""" -primitive type Kind 16 end - -# The implementation of Kind here is basically similar to @enum. However we use -# the K_str macro to self-name these kinds with their literal representation, -# rather than needing to invent a new name for each. - -let kind_int_type = :UInt16 - # Preprocess _kind_names to conflate category markers with the first/last - # in the category. - kindstr_to_int = Dict{String,UInt16}() - i = 1 - while i <= length(_kind_names) - kn = _kind_names[i] - kind_int = i-1 - if startswith(kn, "BEGIN_") - deleteat!(_kind_names, i) - elseif startswith(kn, "END_") - kind_int = i-2 - deleteat!(_kind_names, i) - else - i += 1 - end - push!(kindstr_to_int, kn=>kind_int) - end - - max_kind_int = length(_kind_names)-1 - - @eval begin - function Kind(x::Integer) - if x < 0 || x > $max_kind_int - throw(ArgumentError("Kind out of range: $x")) - end - return Base.bitcast(Kind, convert($kind_int_type, x)) - end - - Base.convert(::Type{String}, k::Kind) = _kind_names[1 + reinterpret($kind_int_type, k)] - - let kindstr_to_int=$kindstr_to_int - function Base.convert(::Type{Kind}, s::AbstractString) - i = get(kindstr_to_int, s) do - error("unknown Kind name $(repr(s))") - end - Kind(i) - end - end - - Base.string(x::Kind) = convert(String, x) - Base.print(io::IO, x::Kind) = print(io, convert(String, x)) - - Base.typemin(::Type{Kind}) = Kind(0) - Base.typemax(::Type{Kind}) = Kind($max_kind_int) - - Base.:<(x::Kind, y::Kind) = reinterpret($kind_int_type, x) < reinterpret($kind_int_type, y) - - Base.instances(::Type{Kind}) = (Kind(i) for i in reinterpret($kind_int_type, typemin(Kind)):reinterpret($kind_int_type, typemax(Kind))) - end -end - -function Base.show(io::IO, k::Kind) - print(io, "K\"$(convert(String, k))\"") -end - -# Save the string representation rather than the bit pattern so that kinds -# can be serialized and deserialized across different JuliaSyntax versions. -function Base.write(io::IO, k::Kind) - str = convert(String, k) - write(io, UInt8(length(str))) + write(io, str) -end -function Base.read(io::IO, ::Type{Kind}) - len = read(io, UInt8) - str = String(read(io, len)) - convert(Kind, str) -end - -#------------------------------------------------------------------------------- - -""" - K"s" - -The kind of a token or AST internal node with string "s". - -For example -* K")" is the kind of the right parenthesis token -* K"block" is the kind of a block of code (eg, statements within a begin-end). -""" -macro K_str(s) - convert(Kind, s) -end - -""" -A set of kinds which can be used with the `in` operator. For example - - k in KSet"+ - *" -""" -macro KSet_str(str) - kinds = [convert(Kind, s) for s in split(str)] - - quote - ($(kinds...),) - end -end - -""" - kind(x) - -Return the `Kind` of `x`. -""" -kind(k::Kind) = k +]) #------------------------------------------------------------------------------- const _nonunique_kind_names = Set([ diff --git a/test/kinds.jl b/test/kinds.jl new file mode 100644 index 00000000..f58fbd80 --- /dev/null +++ b/test/kinds.jl @@ -0,0 +1,59 @@ +# Only test this once per session, as kind modules must be unique (ugh) +if !isdefined(@__MODULE__, :FooKinds) +@eval module FooKinds + +using JuliaSyntax + +function _init_kinds() + JuliaSyntax.register_kinds!(@__MODULE__, 42, [ + "BEGIN_FOO" + "foo_1" + "foo_2" + "BEGIN_FOOBAR" + "foobar_1" + "foobar_2" + "END_FOOBAR" + "END_FOO" + ]) +end + +_init_kinds() + +k_before_init = K"foo_1" + +function __init__() + _init_kinds() +end + +end + +@eval module BarKinds + # Intentionally empty +end + +end + +@testset "Kinds" begin + @test K"foo_1" != K"foo_2" + + @test FooKinds.k_before_init == K"foo_1" + + @test K"BEGIN_FOO" == K"foo_1" + @test K"foo_2" < K"BEGIN_FOOBAR" + @test K"BEGIN_FOOBAR" == K"foobar_1" + @test K"END_FOOBAR" == K"foobar_2" + @test K"END_FOO" == K"foobar_2" + + @test parentmodule(K"foo_1") == FooKinds + @test sprint(show, K"foo_1") == "K\"foo_1\"" + + # Too many kind modules + @test_throws ErrorException JuliaSyntax.register_kinds!(BarKinds, 64, ["hoo?"]) + # Too many kind names per module + @test_throws ErrorException JuliaSyntax.register_kinds!(BarKinds, 42, string.(1:1024)) + # Re-registering or registering new kinds is not supported + @test_throws ErrorException JuliaSyntax.register_kinds!(FooKinds, 42, ["foo_2", "foo_1"]) + @test_throws ErrorException JuliaSyntax.register_kinds!(FooKinds, 42, ["foo_3"]) + # Module ID already taken by FooKinds + @test_throws ErrorException JuliaSyntax.register_kinds!(BarKinds, 42, ["hii?"]) +end diff --git a/test/runtests.jl b/test/runtests.jl index 317f993d..0fc08d08 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -12,6 +12,7 @@ include("test_utils_tests.jl") include("fuzz_test.jl") include("utils.jl") +include("kinds.jl") @testset "Tokenize" begin include("tokenize.jl")