Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce a bitstype just for IANA Variable TimeZones #335

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/TimeZones.jl
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ function __init__()
)

global ISOZonedDateTimeFormat = DateFormat("yyyy-mm-ddTHH:MM:SS.ssszzz")
init_IANA_NAMES!()
end

include("compat.jl")
Expand All @@ -58,6 +59,7 @@ include("utcoffset.jl")
include(joinpath("types", "timezone.jl"))
include(joinpath("types", "fixedtimezone.jl"))
include(joinpath("types", "variabletimezone.jl"))
include(joinpath("types", "ianatimezone.jl"))
include(joinpath("types", "zoneddatetime.jl"))
include("exceptions.jl")
include(joinpath("tzdata", "TZData.jl"))
Expand Down
4 changes: 2 additions & 2 deletions src/arithmetic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,14 @@ function broadcasted(::typeof(+), r::StepRange{ZonedDateTime}, p::DatePeriod)
# non-existent and ambiguous dates.

tz = timezone(start)
if isa(tz, VariableTimeZone)
if isa(tz, AbstractVariableTimeZone)
start = first_valid(DateTime(start) + p, tz, step)
else
start = start + p
end

tz = timezone(stop)
if isa(tz, VariableTimeZone)
if isa(tz, AbstractVariableTimeZone)
stop = last_valid(DateTime(stop) + p, tz, step)
else
stop = stop + p
Expand Down
2 changes: 2 additions & 0 deletions src/conversions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,8 @@ Converts a `ZonedDateTime` from its current `TimeZone` into the specified `TimeZ
"""
function astimezone end

astimezone(zdt::ZonedDateTime, tz::IANATimeZone) = _do_and_rewrap(astimezone, zdt, tz)

function astimezone(zdt::ZonedDateTime, tz::VariableTimeZone)
i = searchsortedlast(
tz.transitions, zdt.utc_datetime,
Expand Down
11 changes: 6 additions & 5 deletions src/discovery.jl
Original file line number Diff line number Diff line change
Expand Up @@ -154,24 +154,25 @@ next_transition_instant

function next_transition_instant(zdt::ZonedDateTime)
tz = zdt.timezone
tz isa VariableTimeZone || return nothing
tz isa AbstractVariableTimeZone || return nothing

transits = transitions(tz)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might be premature optimization to pull this out to be done before.
Idea is to avoid relooking up the backing_timezone for IANATimeZone multiple times

# Determine the index of the transition which occurs after the UTC datetime specified
index = searchsortedfirst(
tz.transitions, DateTime(zdt, UTC),
transits, DateTime(zdt, UTC),
by=el -> isa(el, TimeZones.Transition) ? el.utc_datetime : el,
)

index <= length(tz.transitions) || return nothing
index <= length(transits) || return nothing

# Use the UTC datetime of the transition and the offset information prior to the
# transition to create a `ZonedDateTime` which cannot be constructed with the high-level
# constructors. The instant constructed is equivalent to the first instant after the
# transition but visually appears to be before the transition. For example in a
# transition where the clock changes from 01:59 → 03:00 we would return 02:00 where
# the UTC datetime of 02:00 == 03:00.
utc_datetime = tz.transitions[index].utc_datetime
prev_zone = tz.transitions[index - 1].zone
utc_datetime = transits[index].utc_datetime
prev_zone = transits[index - 1].zone
ZonedDateTime(utc_datetime, tz, prev_zone)
end

Expand Down
4 changes: 4 additions & 0 deletions src/interpret.jl
Original file line number Diff line number Diff line change
Expand Up @@ -163,3 +163,7 @@ function last_valid(local_dt::DateTime, tz::VariableTimeZone)
possible = interpret(local_dt, tz, Local)
return isempty(possible) ? first(shift_gap(local_dt, tz)) : last(possible)
end


first_valid(dt::DateTime, tz::IANATimeZone, args...) = _do_and_rewrap(first_valid, dt, tz, args...)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probnably _do_and_rewrap needs to be renamed

last_valid(dt::DateTime, tz::IANATimeZone, args...) = _do_and_rewrap(last_valid, dt, tz, args...)
136 changes: 136 additions & 0 deletions src/types/ianatimezone.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@

# This is extremely redundant but still only about 8MB for each of our 2 tables
# and it avoids any need to write a smarter but more CPU time expensive perfect hash
# This needs to be big enough to avoid any collisions
# also extra size is useful because it means we are probably safe if new timezones are added
const IANA_TABLE_SIZE = 2^20

const IANA_TIMEZONES = Vector{VariableTimeZone}(undef, IANA_TABLE_SIZE)

# TODO: maybe fill this during build(), probably by generating a julia file.
Copy link
Contributor Author

@oxinabox oxinabox Apr 15, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or maybe it is fine, since it only queries what files exist, using timezone_names rather than reading them.
Maybe timezone_names (or __init__) should do the validation that TimeZones.jl has build properly and has the TZDATA.COMPILED_DIR.
Which would maybe clean up the TimeZone constructor a bit.

# That way we can avoid actually instantitating every timezone til it is needed.
oxinabox marked this conversation as resolved.
Show resolved Hide resolved
const IANA_NAMES = Vector{String}(undef, IANA_TABLE_SIZE)
function init_IANA_NAMES!() # this is run by __init__ (at least for now)
for name in timezone_names()
# TODO: we should workout how to filter out FixedTimeZones here
oxinabox marked this conversation as resolved.
Show resolved Hide resolved
mod_id = iana_mod_id(name)
# Important: Make sure our hash is perfect (even module the table size)
oxinabox marked this conversation as resolved.
Show resolved Hide resolved
isassigned(IANA_NAMES, mod_id) && error("hash collision for $tz, at $mod_id")
IANA_NAMES[mod_id] = name
end
return IANA_NAMES
end

# have checked that this is perfect
perfect_hash(tz::VariableTimeZone, h=zero(UInt)) = perfect_hash(tz.name, h)
function perfect_hash(name::AbstractString, h=zero(UInt))
h = hash(:timezone, h)
h = hash(name, h)
return h
oxinabox marked this conversation as resolved.
Show resolved Hide resolved
end

iana_mod_id(str_or_var_tz) = iana_mod_id(perfect_hash(str_or_var_tz))
iana_mod_id(id::UInt) = mod1(id, IANA_TABLE_SIZE)

function is_standard_iana(str::AbstractString)
mod_id = iana_mod_id(str)
return isassigned(IANA_NAMES, mod_id) && IANA_NAMES[mod_id] == str
end

function get_iana_timezone!(str::AbstractString)
mod_id = iana_mod_id(str)
if isassigned(IANA_TIMEZONES, mod_id)
IANA_TIMEZONES[mod_id]
else
tz_path = joinpath(TZData.COMPILED_DIR, split(str, "/")...)
tz, class = deserialize(tz_path)
# TODO: maybe here is where we check if it is a FixedTimeZone, and if so don't remember it?
oxinabox marked this conversation as resolved.
Show resolved Hide resolved
if tz isa VariableTimeZone
IANA_TIMEZONES[mod_id] = tz
return IANATimeZone(perfect_hash(str))
else
# it is a FixedTimeZone, we are not going to use a IANATimeZone
return tz
end
end
end

function get_iana_timezone!(id::UInt)
mod_id = iana_mod_id(id)
if isassigned(IANA_NAMES, mod_id)
name = IANA_NAMES[mod_id]
return get_iana_timezone!(name)
else
error(
"$id does not correspond to any known IANA timezone. " *
"Check you are using the right version of the IANA database.",
)
end
end


"""
IANATimeZone(::AbstractString) <: AbstractVariableTimeZone

A type for representing a standard variable IANA TimeZome from the tzdata.
Under-the-hood it stores only a unique integer identifier.
"""
struct IANATimeZone <: TimeZone
# id must be a hash of the corresponding Variable/FixedTimeZone
# and it is only possible if `hash` on all timezones in tzdata happens to be perfect
# This is the real hash, not the hash modulo IANA_TABLE_SIZE
# because that way we can in the future change IANA_TABLE_SIZE and not invalidate old
# serialized data.
id::UInt
end

function IANATimeZone(name::AbstractString)
return IANATimeZone(perfect_hash(name))
end

backing_timezone(itz::IANATimeZone) = get_iana_timezone!(itz.id)

Base.:(==)(a::IANATimeZone, b::IANATimeZone) = a.id == b.id
Base.:(==)(a::IANATimeZone, b::TimeZone) = backing_timezone(a) == b
Base.:(==)(b::TimeZone, a::IANATimeZone) = backing_timezone(a) == b

# TODO: we have the hash, it seems like we should be able to use that to get seeded hash
oxinabox marked this conversation as resolved.
Show resolved Hide resolved
Base.hash(a::IANATimeZone, seed::UInt) = hash(backing_timezone(a), seed)

name(a::IANATimeZone) = name(backing_timezone(a))
transitions(tz::IANATimeZone) = transitions(backing_timezone(tz))

# TODO: should i just make this check the fields of VariableTimeZone and just delegate all?
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this should be addressed

function Base.getproperty(tz::IANATimeZone, s::Symbol)
if s === :name
return name(tz)
elseif s == :transitions
return transitions(tz)
else
return getfield(tz, s)
end
end
function Base.hasproperty(tz::IANATimeZone, s::Symbol)
return s === :name || s === :transitions || hasfield(IANATimeZone, s)
end
oxinabox marked this conversation as resolved.
Show resolved Hide resolved




""""
_do_and_rewrap(f, arg1, tz::IANATimeZone, args...; kwargs...)

Run the function `f(arg1, backing_timezone(tz), args...; kwargs...)`
which must return a `ZonedDateTime`, with the backing timezone.
Replace the timezone field with `tz` (which should be equivalent).
"""
function _do_and_rewrap(f, arg1, tz::IANATimeZone, args...; kwargs...)
backed_tz = backing_timezone(tz)
backed_zdt::ZonedDateTime = f(arg1, backing_timezone(tz), args...; kwargs...)
# make it store tz rather than the equiv backing timezone, other fields the same
return ZonedDateTime(backed_zdt.utc_datetime, tz, backed_zdt.zone)
end


Base.show(io::IO, tz::IANATimeZone) = show(io, backing_timezone(tz))
Base.print(io::IO, tz::IANATimeZone) = print(io, backing_timezone(tz))
5 changes: 4 additions & 1 deletion src/types/timezone.jl
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,10 @@ function TimeZone(str::AbstractString, mask::Class=Class(:DEFAULT))
tz, class = get!(TIME_ZONE_CACHE, str) do
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@omus input on the best way to rewrite this function would apprecated.

tz_path = joinpath(TZData.COMPILED_DIR, split(str, "/")...)

if isfile(tz_path)
if mask == Class(:DEFAULT) && is_standard_iana(str)
# TODO: idk if this as sensible way to handle class and mask
get_iana_timezone!(str), Class(:DEFAULT)
elseif isfile(tz_path)
open(deserialize, tz_path, "r")
elseif occursin(FIXED_TIME_ZONE_REGEX, str)
FixedTimeZone(str), Class(:FIXED)
Expand Down
25 changes: 19 additions & 6 deletions src/types/variabletimezone.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,27 @@ end

Base.isless(a::Transition, b::Transition) = isless(a.utc_datetime, b.utc_datetime)

# TODO: define and document an actual API for this
# Seems to need: (list may be incomplete)
# - transitions(tz)
# - name(tz)
# - first_valid(tz)
# - last_valid(tz)
# - some constructors for ZonedDateTime
#
# As well as ones that are common to TimeZone
# - astimezone(zdt, tz)
# - show(io, tz)
# - print(io, tz)
# - `==` and `hash`
abstract type AbstractVariableTimeZone <: TimeZone end

"""
VariableTimeZone

A `TimeZone` with an offset that changes over time.
"""
struct VariableTimeZone <: TimeZone
struct VariableTimeZone <: AbstractVariableTimeZone
name::String
transitions::Vector{Transition}
cutoff::Union{DateTime,Nothing}
Expand All @@ -20,6 +35,8 @@ struct VariableTimeZone <: TimeZone
end
end

transitions(tz::VariableTimeZone) = tz.transitions

name(tz::VariableTimeZone) = tz.name

function rename(tz::VariableTimeZone, name::AbstractString)
Expand All @@ -38,8 +55,4 @@ function Base.isequal(a::VariableTimeZone, b::VariableTimeZone)
)
end

function Base.hash(tz::VariableTimeZone, h::UInt)
h = hash(:timezone, h)
h = hash(tz.name, h)
return h
end
Base.hash(tz::VariableTimeZone, h::UInt) = perfect_hash(tz, h)
10 changes: 9 additions & 1 deletion src/types/zoneddatetime.jl
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,14 @@ function ZonedDateTime(dt::DateTime, tz::VariableTimeZone, is_dst::Bool)
end
end

function ZonedDateTime(dt::DateTime, tz::IANATimeZone, occ_or_dst::Integer)
return _do_and_rewrap(ZonedDateTime, dt, tz, occ_or_dst)
end

function ZonedDateTime(dt::DateTime, tz::IANATimeZone; kwargs...)
return _do_and_rewrap(ZonedDateTime, dt, tz; kwargs...)
end

# Convenience constructors
@doc """
ZonedDateTime(y, [m, d, h, mi, s, ms], tz, [amb]) -> DateTime
Expand All @@ -113,7 +121,7 @@ Construct a `ZonedDateTime` type by parts. Arguments `y, m, ..., ms` must be con
`TimeZone` then `amb` can be supplied to resolve ambiguity.
""" ZonedDateTime

@optional function ZonedDateTime(y::Integer, m::Integer=1, d::Integer=1, h::Integer=0, mi::Integer=0, s::Integer=0, ms::Integer=0, tz::VariableTimeZone, amb::Union{Integer,Bool})
@optional function ZonedDateTime(y::Integer, m::Integer=1, d::Integer=1, h::Integer=0, mi::Integer=0, s::Integer=0, ms::Integer=0, tz::AbstractVariableTimeZone, amb::Union{Integer,Bool})
ZonedDateTime(DateTime(y,m,d,h,mi,s,ms), tz, amb)
end

Expand Down