Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add parameters #9

Merged
merged 10 commits into from
Aug 11, 2023
3 changes: 3 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ uuid = "7d259134-7f60-4bf1-aa00-7452e11bde56"
authors = ["Jim Pivarski <pivarski@princeton.edu>", "Jerry Ling <jerry.ling@cern.ch>", "and contributors"]
version = "1.0.0-DEV"

[deps]
JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"

[compat]
julia = "1.8"

Expand Down
193 changes: 179 additions & 14 deletions src/AwkwardArray.jl
Original file line number Diff line number Diff line change
@@ -1,17 +1,93 @@
# using JSON

module AwkwardArray

### Index ################################################################

Index8 = AbstractArray{Int8,1}
IndexU8 = AbstractArray{UInt8,1}
Index32 = AbstractArray{Int32,1}
IndexU32 = AbstractArray{UInt32,1}
Index64 = AbstractArray{Int64,1}
IndexBig = Union{Index32,IndexU32,Index64}
const Index8 = AbstractVector{Int8}
const IndexU8 = AbstractVector{UInt8}
const Index32 = AbstractVector{Int32}
const IndexU32 = AbstractVector{UInt32}
const Index64 = AbstractVector{Int64}
const IndexBig = Union{Index32,IndexU32,Index64}

### Parameters ###########################################################

default = :default
char = :char
byte = :byte
string = :string
bytestring = :bytestring
categorical = :categorical
sorted_map = :sorted_map

struct Parameters
string_valued::Base.ImmutableDict{String,String}
any_valued::Base.ImmutableDict{String,Any}
end

Parameters() =
Parameters(Base.ImmutableDict{String,String}(), Base.ImmutableDict{String,Any}())

function Parameters(pairs::Vararg{Pair{String,<:Any}})
out = Parameters()
for pair in pairs
out = with_parameter(out, pair)
end
out
end

with_parameter(parameters::Parameters, pair::Pair{String,String}) =
Parameters(Base.ImmutableDict(parameters.string_valued, pair), parameters.any_valued)

with_parameter(parameters::Parameters, pair::Pair{String,<:Any}) =
Parameters(parameters.string_valued, Base.ImmutableDict(parameters.any_valued, pair))

has_parameter(parameters::Parameters, key::String) =
if haskey(parameters.string_valued, key)
true
elseif haskey(parameters.any_valued, key)
true
else
false
end

get_parameter(parameters::Parameters, key::String) =
if haskey(parameters.string_valued, key)
parameters.string_valued[key]
elseif haskey(parameters.any_valued, key)
parameters.any_valued[key]
else
nothing
end

Base.length(parameters::Parameters) =
length(parameters.string_valued) + length(parameters.any_valued)

Base.show(io::IO, parameters::Parameters) = print(
io,
"Parameters(" *
join(
[
"$(repr(pair[1])) => $(repr(pair[2]))" for
pair in merge(parameters.any_valued, parameters.string_valued)
],
", ",
) *
")",
)

### Content ##############################################################

abstract type Content <: AbstractArray{T where T,1} end
struct Unset end

abstract type Content{BEHAVIOR} <: AbstractVector{ITEM where ITEM} end

has_parameter(content::CONTENT, key::String) where {CONTENT<:Content} =
has_parameter(content.parameters, key)

get_parameter(content::CONTENT, key::String) where {CONTENT<:Content} =
get_parameter(content.parameters, key)

function Base.iterate(layout::Content)
start = firstindex(layout)
Expand All @@ -36,11 +112,40 @@ Base.size(layout::Content) = (length(layout),)

### PrimitiveArray #######################################################

struct PrimitiveArray{T,ARRAY<:AbstractArray{T,1}} <: Content
data::ARRAY
struct PrimitiveArray{ITEM,BUFFER<:AbstractVector{ITEM},BEHAVIOR} <: Content{BEHAVIOR}
data::BUFFER
parameters::Parameters
PrimitiveArray(
data::BUFFER;
parameters::Parameters = Parameters(),
behavior::Symbol = :default,
) where {ITEM,BUFFER<:AbstractVector{ITEM}} =
new{ITEM,BUFFER,behavior}(data, parameters)
end

PrimitiveArray{T}() where {T} = PrimitiveArray(Vector{T}([]))
PrimitiveArray{ITEM}(;
parameters::Parameters = Parameters(),
behavior::Symbol = :default,
) where {ITEM} =
PrimitiveArray(Vector{ITEM}([]), parameters = parameters, behavior = behavior)

function copy(
layout::PrimitiveArray{ITEM,BUFFER,BEHAVIOR};
data::Union{Unset,BUFFER} = Unset(),
parameters::Union{Unset,Parameters} = Unset(),
behavior::Union{Unset,Symbol} = Unset(),
) where {ITEM,BUFFER<:AbstractVector{ITEM},BEHAVIOR}
if isa(data, Unset)
data = layout.data
end
if isa(parameters, Unset)
parameters = layout.parameters
end
if isa(behavior, Unset)
behavior = typeof(layout).parameters[end]
end
PrimitiveArray(data, parameters = parameters, behavior = behavior)
end

is_valid(layout::PrimitiveArray) = true
Base.length(layout::PrimitiveArray) = length(layout.data)
Expand All @@ -59,20 +164,57 @@ function Base.:(==)(layout1::PrimitiveArray, layout2::PrimitiveArray)
layout1.data == layout2.data
end

function push!(layout::PrimitiveArray{T}, x::T) where {T}
function push!(layout::PrimitiveArray{ITEM}, x::ITEM) where {ITEM}
Base.push!(layout.data, x)
layout
end

### ListOffsetArray ######################################################

struct ListOffsetArray{INDEX<:IndexBig,CONTENT<:Content} <: Content
struct ListOffsetArray{INDEX<:IndexBig,CONTENT<:Content,BEHAVIOR} <: Content{BEHAVIOR}
offsets::INDEX
content::CONTENT
parameters::Parameters
ListOffsetArray(
offsets::INDEX,
content::CONTENT;
parameters::Parameters = Parameters(),
behavior::Symbol = :default,
) where {INDEX<:IndexBig,CONTENT<:Content} =
new{INDEX,CONTENT,behavior}(offsets, content, parameters)
end

ListOffsetArray{INDEX,CONTENT}() where {INDEX<:IndexBig} where {CONTENT<:Content} =
AwkwardArray.ListOffsetArray(INDEX([0]), CONTENT())
ListOffsetArray{INDEX,CONTENT}(;
parameters::Parameters = Parameters(),
behavior::Symbol = :default,
) where {INDEX<:IndexBig} where {CONTENT<:Content} = AwkwardArray.ListOffsetArray(
INDEX([0]),
CONTENT(),
parameters = parameters,
behavior = behavior,
)

function copy(
layout::ListOffsetArray{INDEX,CONTENT,BEHAVIOR};
offsets::Union{Unset,INDEX} = Unset(),
content::Union{Unset,CONTENT} = Unset(),
parameters::Union{Unset,Parameters} = Unset(),
behavior::Union{Unset,Symbol} = Unset(),
) where {INDEX<:IndexBig,CONTENT<:Content,BEHAVIOR}
if isa(offsets, Unset)
offsets = layout.offsets
end
if isa(content, Unset)
content = layout.content
end
if isa(parameters, Unset)
parameters = layout.parameters
end
if isa(behavior, Unset)
behavior = typeof(layout).parameters[end]
end
ListOffsetArray(offsets, content, parameters = parameters, behavior = behavior)
end

function is_valid(layout::ListOffsetArray)
if length(layout.offsets) < 1
Expand Down Expand Up @@ -121,4 +263,27 @@ function end_list!(layout::ListOffsetArray)
layout
end

### ListOffsetArray with behavior = :string ##############################

function Base.getindex(
layout::ListOffsetArray{INDEX,PrimitiveArray{UInt8,BUFFER,:char},:string},
i::Int,
) where {INDEX<:IndexBig,BUFFER<:AbstractVector{UInt8}}
String(
getindex(
ListOffsetArray(layout.offsets, PrimitiveArray(layout.content.data)),
i,
).data,
)
end

### ListOffsetArray with behavior = :bytestring ##########################

function Base.getindex(
layout::ListOffsetArray{INDEX,PrimitiveArray{UInt8,BUFFER,:byte},:bytestring},
i::Int,
) where {INDEX<:IndexBig,BUFFER<:AbstractVector{UInt8}}
getindex(ListOffsetArray(layout.offsets, PrimitiveArray(layout.content.data)), i).data
end

end
112 changes: 112 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ using AwkwardArray
using Test

@testset "AwkwardArray.jl" begin
### PrimitiveArray #######################################################

begin
layout = AwkwardArray.PrimitiveArray([1.1, 2.2, 3.3, 4.4, 5.5])
@test AwkwardArray.is_valid(layout)
Expand Down Expand Up @@ -34,6 +36,8 @@ using Test
@test layout == AwkwardArray.PrimitiveArray([1.1, 2.2, 3.3, 4.4, 5.5])
end

### ListOffsetArray ######################################################

begin
layout = AwkwardArray.ListOffsetArray(
[0, 3, 3, 5],
Expand Down Expand Up @@ -104,4 +108,112 @@ using Test
)
end

### ListOffsetArray with behavior = :string ##############################

begin
layout = AwkwardArray.ListOffsetArray(
[0, 3, 8, 9, 11, 14, 18],
AwkwardArray.PrimitiveArray(
[
0x68,
0x65,
0x79,
0x74,
0x68,
0x65,
0x72,
0x65,
0x24,
0xc2,
0xa2,
0xe2,
0x82,
0xac,
0xf0,
0x9f,
0x92,
0xb0,
],
behavior = :char,
),
behavior = :string,
)

@test layout[1] == "hey"
@test layout[2] == "there"
@test layout[3] == "\$"
@test layout[4] == "¢"
@test layout[5] == "€"
@test layout[6] == "💰"

@test Vector(layout) == ["hey", "there", "\$", "¢", "€", "💰"]
end

### ListOffsetArray with behavior = :bytestring ##########################

begin
layout = AwkwardArray.ListOffsetArray(
[0, 3, 8, 9, 11, 14, 18],
AwkwardArray.PrimitiveArray(
[
0x68,
0x65,
0x79,
0x74,
0x68,
0x65,
0x72,
0x65,
0x24,
0xc2,
0xa2,
0xe2,
0x82,
0xac,
0xf0,
0x9f,
0x92,
0xb0,
],
behavior = :byte,
),
behavior = :bytestring,
)

@test layout[1] == [0x68, 0x65, 0x79]
@test layout[2] == [0x74, 0x68, 0x65, 0x72, 0x65]
@test layout[3] == [0x24]
@test layout[4] == [0xc2, 0xa2]
@test layout[5] == [0xe2, 0x82, 0xac]
@test layout[6] == [0xf0, 0x9f, 0x92, 0xb0]
end

### ListOffsetArray with other parameters ################################

begin
layout = AwkwardArray.ListOffsetArray(
[0, 3, 3, 8],
AwkwardArray.PrimitiveArray([0x68, 0x65, 0x79, 0x74, 0x68, 0x65, 0x72, 0x65],),
parameters = AwkwardArray.Parameters("__doc__" => "nice list"),
)

@test AwkwardArray.get_parameter(layout, "__doc__") == "nice list"
@test !AwkwardArray.has_parameter(layout, "__list__")
end

begin
layout = AwkwardArray.ListOffsetArray(
[0, 3, 3, 8],
AwkwardArray.PrimitiveArray(
[0x68, 0x65, 0x79, 0x74, 0x68, 0x65, 0x72, 0x65],
behavior = :char,
),
parameters = AwkwardArray.Parameters("__doc__" => "nice string"),
behavior = :string,
)

@test AwkwardArray.get_parameter(layout, "__doc__") == "nice string"
@test !AwkwardArray.has_parameter(layout, "__list__")
end

end