Skip to content

Commit

Permalink
Track chosen cachefile and load the module path (#37421)
Browse files Browse the repository at this point in the history
These changes allow Revise to leverage precompiled Base machinery,
dropping about 150ms from Revise's extra latency penalty for the first
package load.
  • Loading branch information
timholy authored Sep 13, 2020
1 parent 72854dc commit 667ab89
Show file tree
Hide file tree
Showing 3 changed files with 116 additions and 14 deletions.
73 changes: 61 additions & 12 deletions base/loading.jl
Original file line number Diff line number Diff line change
Expand Up @@ -824,9 +824,19 @@ function require(into::Module, mod::Symbol)
return require(uuidkey, cache)
end

struct PkgOrigin
# version::VersionNumber
# path::String
cachepath::Union{String,Nothing}
end
const pkgorigins = Dict{PkgId,PkgOrigin}()

function require(uuidkey::PkgId, cache::TOMLCache=TOMLCache())
if !root_module_exists(uuidkey)
_require(uuidkey, cache)
cachefile = _require(uuidkey, cache)
if cachefile !== nothing
pkgorigins[uuidkey] = PkgOrigin(cachefile)
end
# After successfully loading, notify downstream consumers
for callback in package_callbacks
invokelatest(callback, uuidkey)
Expand Down Expand Up @@ -881,6 +891,7 @@ function unreference_module(key::PkgId)
end
end

# Returns `nothing` or the name of the newly-created cachefile
function _require(pkg::PkgId, cache::TOMLCache)
# handle recursive calls to require
loading = get(package_locks, pkg, false)
Expand Down Expand Up @@ -942,7 +953,7 @@ function _require(pkg::PkgId, cache::TOMLCache)
if isa(m, Exception)
@warn "The call to compilecache failed to create a usable precompiled cache file for $pkg" exception=m
else
return
return cachefile
end
end
end
Expand Down Expand Up @@ -1257,6 +1268,13 @@ module_build_id(m::Module) = ccall(:jl_module_build_id, UInt64, (Any,), m)
isvalid_cache_header(f::IOStream) = (0 != ccall(:jl_read_verify_header, Cint, (Ptr{Cvoid},), f.ios))
isvalid_file_crc(f::IOStream) = (_crc32c(seekstart(f), filesize(f) - 4) == read(f, UInt32))

struct CacheHeaderIncludes
id::PkgId
filename::String
mtime::Float64
modpath::Vector{String} # seemingly not needed in Base, but used by Revise
end

function parse_cache_header(f::IO)
modules = Vector{Pair{PkgId, UInt64}}()
while true
Expand All @@ -1270,7 +1288,7 @@ function parse_cache_header(f::IO)
totbytes = read(f, Int64) # total bytes for file dependencies
# read the list of requirements
# and split the list into include and requires statements
includes = Tuple{PkgId, String, Float64}[]
includes = CacheHeaderIncludes[]
requires = Pair{PkgId, PkgId}[]
while true
n2 = read(f, Int32)
Expand All @@ -1280,20 +1298,21 @@ function parse_cache_header(f::IO)
n1 = read(f, Int32)
# map ids to keys
modkey = (n1 == 0) ? PkgId("") : modules[n1].first
modpath = String[]
if n1 != 0
# consume (and ignore) the module path too
# determine the complete module path
while true
n1 = read(f, Int32)
totbytes -= 4
n1 == 0 && break
skip(f, n1) # String(read(f, n1))
push!(modpath, String(read(f, n1)))
totbytes -= n1
end
end
if depname[1] == '\0'
push!(requires, modkey => binunpack(depname))
else
push!(includes, (modkey, depname, mtime))
push!(includes, CacheHeaderIncludes(modkey, depname, mtime, modpath))
end
totbytes -= 4 + 4 + n2 + 8
end
Expand All @@ -1312,19 +1331,28 @@ function parse_cache_header(f::IO)
return modules, (includes, requires), required_modules, srctextpos
end

function parse_cache_header(cachefile::String)
function parse_cache_header(cachefile::String; srcfiles_only::Bool=false)
io = open(cachefile, "r")
try
!isvalid_cache_header(io) && throw(ArgumentError("Invalid header in cache file $cachefile."))
return parse_cache_header(io)
ret = parse_cache_header(io)
srcfiles_only || return ret
modules, (includes, requires), required_modules, srctextpos = ret
srcfiles = srctext_files(io, srctextpos)
delidx = Int[]
for (i, chi) in enumerate(includes)
chi.filename srcfiles || push!(delidx, i)
end
deleteat!(includes, delidx)
return modules, (includes, requires), required_modules, srctextpos
finally
close(io)
end
end

function cache_dependencies(f::IO)
defs, (includes, requires), modules = parse_cache_header(f)
return modules, map(mod_fl_mt -> (mod_fl_mt[2], mod_fl_mt[3]), includes) # discard the module
return modules, map(chi -> (chi.filename, chi.mtime), includes) # return just filename and mtime
end

function cache_dependencies(cachefile::String)
Expand Down Expand Up @@ -1368,6 +1396,21 @@ function read_dependency_src(cachefile::String, filename::AbstractString)
end
end

function srctext_files(f::IO, srctextpos::Int64)
files = Set{String}()
srctextpos == 0 && return files
seek(f, srctextpos)
while !eof(f)
filenamelen = read(f, Int32)
filenamelen == 0 && break
fn = String(read(f, filenamelen))
len = read(f, UInt64)
push!(files, fn)
seek(f, position(f) + len)
end
return files
end

# returns true if it "cachefile.ji" is stale relative to "modpath.jl"
# otherwise returns the list of dependencies to also check
stale_cachefile(modpath::String, cachefile::String) = stale_cachefile(modpath, cachefile, TOMLCache())
Expand All @@ -1379,6 +1422,7 @@ function stale_cachefile(modpath::String, cachefile::String, cache::TOMLCache)
return true # invalid cache file
end
(modules, (includes, requires), required_modules) = parse_cache_header(io)
id = isempty(modules) ? nothing : first(modules).first
modules = Dict{PkgId, UInt64}(modules)

# Check if transitive dependencies can be fulfilled
Expand Down Expand Up @@ -1423,8 +1467,8 @@ function stale_cachefile(modpath::String, cachefile::String, cache::TOMLCache)

# now check if this file is fresh relative to its source files
if !skip_timecheck
if !samefile(includes[1][2], modpath)
@debug "Rejecting cache file $cachefile because it is for file $(includes[1][2])) not file $modpath"
if !samefile(includes[1].filename, modpath)
@debug "Rejecting cache file $cachefile because it is for file $(includes[1].filename)) not file $modpath"
return true # cache file was compiled from a different path
end
for (modkey, req_modkey) in requires
Expand All @@ -1434,7 +1478,8 @@ function stale_cachefile(modpath::String, cachefile::String, cache::TOMLCache)
return true
end
end
for (_, f, ftime_req) in includes
for chi in includes
f, ftime_req = chi.filename, chi.mtime
# Issue #13606: compensate for Docker images rounding mtimes
# Issue #20837: compensate for GlusterFS truncating mtimes to microseconds
ftime = mtime(f)
Expand All @@ -1450,6 +1495,10 @@ function stale_cachefile(modpath::String, cachefile::String, cache::TOMLCache)
return true
end

if isa(id, PkgId)
pkgorigins[id] = PkgOrigin(cachefile)
end

return depmods # fresh cachefile
finally
close(io)
Expand Down
2 changes: 2 additions & 0 deletions contrib/generate_precompile.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,12 @@ hardcoded_precompile_statements = """
@assert precompile(Tuple{typeof(push!), Set{Module}, Module})
@assert precompile(Tuple{typeof(push!), Set{Method}, Method})
@assert precompile(Tuple{typeof(push!), Set{Base.PkgId}, Base.PkgId})
@assert precompile(Tuple{typeof(getindex), Dict{Base.PkgId,String}, Base.PkgId})
@assert precompile(Tuple{typeof(setindex!), Dict{String,Base.PkgId}, Base.PkgId, String})
@assert precompile(Tuple{typeof(get!), Type{Vector{Function}}, Dict{Base.PkgId,Vector{Function}}, Base.PkgId})
@assert precompile(Tuple{typeof(isassigned), Core.SimpleVector, Int})
@assert precompile(Tuple{typeof(pushfirst!), Vector{Any}, Function})
@assert precompile(Tuple{typeof(Base.parse_cache_header), String})
"""

precompile_script = """
Expand Down
55 changes: 53 additions & 2 deletions test/precompile.jl
Original file line number Diff line number Diff line change
Expand Up @@ -264,9 +264,9 @@ try
@test string(Base.Docs.doc(Foo.Bar.bar)) == "bar function\n"

modules, (deps, requires), required_modules = Base.parse_cache_header(cachefile)
discard_module = mod_fl_mt -> (mod_fl_mt[2], mod_fl_mt[3])
discard_module = mod_fl_mt -> (mod_fl_mt.filename, mod_fl_mt.mtime)
@test modules == [ Base.PkgId(Foo) => Base.module_build_id(Foo) ]
@test map(x -> x[2], deps) == [ Foo_file, joinpath(dir, "foo.jl"), joinpath(dir, "bar.jl") ]
@test map(x -> x.filename, deps) == [ Foo_file, joinpath(dir, "foo.jl"), joinpath(dir, "bar.jl") ]
@test requires == [ Base.PkgId(Foo) => Base.PkgId(string(FooBase_module)),
Base.PkgId(Foo) => Base.PkgId(Foo2),
Base.PkgId(Foo) => Base.PkgId(Test),
Expand Down Expand Up @@ -299,6 +299,8 @@ try
end
)
@test discard_module.(deps) == deps1
modules, (deps, requires), required_modules = Base.parse_cache_header(cachefile; srcfiles_only=true)
@test map(x -> x.filename, deps) == [Foo_file]

@test current_task()(0x01, 0x4000, 0x30031234) == 2
@test sin(0x01, 0x4000, 0x30031234) == 52
Expand Down Expand Up @@ -339,6 +341,55 @@ try
@test pointer_from_objref(PV) === pointer_from_objref(ft(ft(PV)[1].parameters[1])[1])
end

Nest_module = :Nest4b3a94a1a081a8cb
Nest_file = joinpath(dir, "$Nest_module.jl")
NestInner_file = joinpath(dir, "$(Nest_module)Inner.jl")
NestInner2_file = joinpath(dir, "$(Nest_module)Inner2.jl")
write(Nest_file,
"""
module $Nest_module
include("$(escape_string(NestInner_file))")
end
""")
write(NestInner_file,
"""
module NestInner
include("$(escape_string(NestInner2_file))")
end
""")
write(NestInner2_file,
"""
f() = 22
""")
Nest = Base.require(Main, Nest_module)
cachefile = joinpath(cachedir, "$Nest_module.ji")
modules, (deps, requires), required_modules = Base.parse_cache_header(cachefile)
@test last(deps).modpath == ["NestInner"]

UsesB_module = :UsesB4b3a94a1a081a8cb
B_module = :UsesB4b3a94a1a081a8cb_B
UsesB_file = joinpath(dir, "$UsesB_module.jl")
B_file = joinpath(dir, "$(B_module).jl")
write(UsesB_file,
"""
module $UsesB_module
using $B_module
end
""")
write(B_file,
"""
module $B_module
export bfunc
bfunc() = 33
end
""")
UsesB = Base.require(Main, UsesB_module)
cachefile = joinpath(cachedir, "$UsesB_module.ji")
modules, (deps, requires), required_modules = Base.parse_cache_header(cachefile)
id1, id2 = only(requires)
@test Base.pkgorigins[id1].cachepath == cachefile
@test Base.pkgorigins[id2].cachepath == joinpath(cachedir, "$B_module.ji")

Baz_file = joinpath(dir, "Baz.jl")
write(Baz_file,
"""
Expand Down

2 comments on commit 667ab89

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Executing the daily benchmark build, I will reply here when finished:

@nanosoldier runbenchmarks(ALL, isdaily = true)

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Your benchmark job has completed - possible performance regressions were detected. A full report can be found here. cc @ararslan

Please sign in to comment.