Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Don't log/comment errors, require an admin to look at the logs. #114

Merged
merged 3 commits into from
Aug 8, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
118 changes: 34 additions & 84 deletions src/jobs/BenchmarkJob.jl
Original file line number Diff line number Diff line change
Expand Up @@ -160,9 +160,8 @@ function retrieve_daily_data!(cfg, date)
return results, repo, commit
end
catch err
nodelog(cfg, myid(),
"encountered error when retrieving daily data: " * sprint(showerror, err),
error=(err, stacktrace(catch_backtrace())))
nodelog(cfg, myid(), "encountered error when retrieving daily data";
error=(err, catch_backtrace()))
finally
isdir(datapath) && rm(datapath, recursive=true)
end
Expand Down Expand Up @@ -223,19 +222,14 @@ function Base.run(job::BenchmarkJob)
ex isa TaskFailedException || rethrow()
end

# run primary job
try
# run primary job
julia_primary = fetch(julia_primary)
nodelog(cfg, node, "running primary build for $(summary(job))")
results["primary"] = execute_benchmarks!(job, julia_primary, :primary)
nodelog(cfg, node, "finished primary build for $(summary(job))")
catch err
results["error"] = NanosoldierError("failed to run benchmarks against primary commit", err)
results["backtrace"] = catch_backtrace()
end

# as long as our primary job didn't error, run the comparison job (or if it's a daily job, gather results to compare against)
if !haskey(results, "error")
# run the comparison job (or if it's a daily job, gather results to compare against)
if job.isdaily # get results from previous day (if it exists, check the past 120 days)
try
nodelog(cfg, node, "retrieving results from previous daily build")
Expand All @@ -258,26 +252,21 @@ function Base.run(job::BenchmarkJob)
rethrow(NanosoldierError("encountered error when retrieving old daily build data", err))
end
elseif job.against !== nothing # run comparison build
try
julia_against = fetch(julia_against)
nodelog(cfg, node, "running comparison build for $(summary(job))")
results["against"] = execute_benchmarks!(job, julia_against, :against)
nodelog(cfg, node, "finished comparison build for $(summary(job))")
catch err
results["error"] = NanosoldierError("failed to run benchmarks against comparison commit", err)
results["backtrace"] = catch_backtrace()
end
julia_against = fetch(julia_against)
nodelog(cfg, node, "running comparison build for $(summary(job))")
results["against"] = execute_benchmarks!(job, julia_against, :against)
nodelog(cfg, node, "finished comparison build for $(summary(job))")
end
if haskey(results, "against")
results["judged"] = BenchmarkTools.judge(minimum(results["primary"]), minimum(results["against"]))
end
end

for dir in cleanup
if ispath(dir)
run(`sudo -n -u $(cfg.user) -- chmod -R ug+rwX $dir/julia`) # make it rwx
Base.Filesystem.prepare_for_deletion(dir)
rm(dir, recursive=true)
finally
for dir in cleanup
if ispath(dir)
run(`sudo -n -u $(cfg.user) -- chmod -R ug+rwX $dir/julia`) # make it rwx
Base.Filesystem.prepare_for_deletion(dir)
rm(dir, recursive=true)
end
end
end

Expand Down Expand Up @@ -507,7 +496,7 @@ function report(job::BenchmarkJob, results)
"but no benchmarks were actually executed. Perhaps your tag predicate " *
"contains misspelled tags? cc @$(cfg.admin)")
else
# prepare report + data and push it to report repo
# prepare report + data and push it to report repo
target_url = ""
try
nodelog(cfg, node, "...generating report...")
Expand All @@ -530,40 +519,26 @@ function report(job::BenchmarkJob, results)
rethrow(NanosoldierError("error when preparing/pushing to report repo", err))
end

if haskey(results, "error")
# TODO: throw with backtrace?
if haskey(results, "backtrace")
@error("An exception occurred during job execution",
exception=(results["error"], results["backtrace"]))
else
@error("An exception occurred during job execution",
exception=results["error"])
end
err = results["error"]
err.url = target_url
throw(err)
# determine the job's final status
if job.against !== nothing || haskey(results, "previous_date")
found_regressions = BenchmarkTools.isregression(results["judged"])
state = found_regressions ? "failure" : "success"
status = found_regressions ? "possible performance regressions were detected" :
"no performance regressions were detected"
else
# determine the job's final status
if job.against !== nothing || haskey(results, "previous_date")
found_regressions = BenchmarkTools.isregression(results["judged"])
state = found_regressions ? "failure" : "success"
status = found_regressions ? "possible performance regressions were detected" :
"no performance regressions were detected"
else
state = "success"
status = "successfully executed benchmarks"
end
# reply with the job's final status
reply_status(job, state, status, target_url)
if isempty(target_url)
comment = "[Your benchmark job]($(submission(job).url)) has completed, but " *
"something went wrong when trying to upload the result data. cc @$(cfg.admin)"
else
comment = "[Your benchmark job]($(submission(job).url)) has completed - " *
"$(status). A full report can be found [here]($(target_url))."
end
reply_comment(job, comment)
state = "success"
status = "successfully executed benchmarks"
end
# reply with the job's final status
reply_status(job, state, status, target_url)
if isempty(target_url)
comment = "[Your benchmark job]($(submission(job).url)) has completed, but " *
"something went wrong when trying to upload the result data. cc @$(cfg.admin)"
else
comment = "[Your benchmark job]($(submission(job).url)) has completed - " *
"$(status). A full report can be found [here]($(target_url))."
end
reply_comment(job, comment)
end
end

Expand Down Expand Up @@ -633,31 +608,6 @@ function printreport(io::IO, job::BenchmarkJob, results)
""")
end

# if errors are found, end the report now #
#-----------------------------------------#

if haskey(results, "error")
println(io, """
## Error

The build could not finish due to an error:

```""")

Base.showerror(io, results["error"])
if haskey(results, "backtrace")
Base.show_backtrace(io, results["backtrace"])
end
println(io)

println(io, """
```

Check the logs folder in this directory for more detailed output.
""")
return nothing
end

# print result table #
#--------------------#

Expand Down
91 changes: 27 additions & 64 deletions src/jobs/PkgEvalJob.jl
Original file line number Diff line number Diff line change
Expand Up @@ -216,11 +216,15 @@ function execute_tests!(job::PkgEvalJob, builds::Dict,
try
# NOTE: the merge head only exists in the upstream Julia repository,
# and not in the repository where the pull request originated.
# TODO: do this without reaching into PkgEval internals
install = PkgEval.get_julia_repo("pull/$pr/merge")
julia = "pull/$pr/merge"
nodelog(cfg, node, "Resolved $whichbuild build to Julia merge head of PR $pr")
rm(install; recursive=true)
catch err
isa(err, LibGit2.GitError) || rethrow()
# there might not be a merge commit (e.g. in the case of merge conflicts)
nodelog(cfg, node, "Could not check-out merge head of PR $pr";
error=(err, catch_backtrace()))
end
end
end
Expand Down Expand Up @@ -397,13 +401,10 @@ function Base.run(job::PkgEvalJob)
nodelog(cfg, node, "running tests for $(summary(job))")
execute_tests!(job, builds, buildflags, job.compiled, job.rr, results)
nodelog(cfg, node, "running tests for $(summary(job))")
catch err
results["error"] = NanosoldierError("failed to run tests", err)
results["backtrace"] = catch_backtrace()
finally
PkgEval.purge()
end

PkgEval.purge()

# report results
nodelog(cfg, node, "reporting results for $(summary(job))")
report(job, results)
Expand Down Expand Up @@ -433,7 +434,7 @@ function report(job::PkgEvalJob, results)
reportname = "report.md"
report_md = sprint(io->printreport(io, job, results))
write(joinpath(tmpdir(job), reportname), report_md)
if job.isdaily && !haskey(results, "error")
if job.isdaily
nodelog(cfg, node, "...generating database...")
dbname = "db.json"
open(joinpath(tmpdir(job), dbname), "w") do file
Expand All @@ -449,7 +450,7 @@ function report(job::PkgEvalJob, results)
nodelog(cfg, node, "...moving $(tmpdir(job)) to $(reportdir(job))...")
mkpath(reportdir(job))
mv(tmpdir(job), reportdir(job); force=true)
if job.isdaily && !haskey(results, "error")
if job.isdaily
latest = reportdir(job; latest=true)
islink(latest) && rm(latest)
symlink(datedirname(job.date), latest)
Expand Down Expand Up @@ -495,39 +496,26 @@ function report(job::PkgEvalJob, results)
rethrow(NanosoldierError("error when preparing/pushing to report repo", err))
end

if haskey(results, "error")
# TODO: throw with backtrace?
if haskey(results, "backtrace")
@error("An exception occurred during job execution",
exception=(results["error"], results["backtrace"]))
else
@error("An exception occurred during job execution",
exception=results["error"])
end
err = results["error"]
err.url = target_url
throw(err)
# determine the job's final status
state = results["has_issues"] ? "failure" : "success"
if job.against !== nothing
status = results["has_issues"] ? "possible new issues were detected" :
"no new issues were detected"
else
# determine the job's final status
state = results["has_issues"] ? "failure" : "success"
if job.against !== nothing
status = results["has_issues"] ? "possible new issues were detected" :
"no new issues were detected"
else
status = results["has_issues"] ? "possible issues were detected" :
"no issues were detected"
end
# reply with the job's final status
reply_status(job, state, status, target_url)
if isempty(target_url)
comment = "[Your package evaluation job]($(submission(job).url)) has completed, but " *
"something went wrong when trying to upload the result data. cc @$(cfg.admin)"
else
comment = "[Your package evaluation job]($(submission(job).url)) has completed - " *
"$(status). A full report can be found [here]($(target_url))."
end
reply_comment(job, comment)
status = results["has_issues"] ? "possible issues were detected" :
"no issues were detected"
end

# reply with the job's final status
reply_status(job, state, status, target_url)
if isempty(target_url)
comment = "[Your package evaluation job]($(submission(job).url)) has completed, but " *
"something went wrong when trying to upload the result data. cc @$(cfg.admin)"
else
comment = "[Your package evaluation job]($(submission(job).url)) has completed - " *
"$(status). A full report can be found [here]($(target_url))."
end
reply_comment(job, comment)
end
end

Expand Down Expand Up @@ -604,31 +592,6 @@ function printreport(io::IO, job::PkgEvalJob, results)
""")
end

# if errors are found, end the report now #
#-----------------------------------------#

if haskey(results, "error")
println(io, """
## Error

The build could not finish due to an error:

```""")

Base.showerror(io, results["error"])
if haskey(results, "backtrace")
Base.show_backtrace(io, results["backtrace"])
end
println(io)

println(io, """
```

Check the logs folder in this directory for more detailed output.
""")
return nothing
end

# print summary of tested packages #
#----------------------------------#

Expand Down
21 changes: 8 additions & 13 deletions src/server.jl
Original file line number Diff line number Diff line change
Expand Up @@ -107,19 +107,14 @@ function delegate_job(server::Server, job::AbstractJob, node)
remotecall_fetch(run, node, job)
nodelog(server.config, node, "completed job: $(summary(job))")
catch err
err = isa(err, RemoteException) ? err.captured.ex : err
err_str = string(err)
message = "Something went wrong when running [your job]($(submission(job).url)):\n```\n$(err_str)\n```\n"
if isa(err, NanosoldierError)
if isempty(err.url)
message *= "Unfortunately, the logs could not be uploaded.\n"
else
message *= "Logs and partial data can be found [here]($(err.url))\n"
end
end
isempty(server.config.admin) || (message *= "cc @$(server.config.admin)")
nodelog(server.config, node, err_str, error=(err, stacktrace(catch_backtrace())))
reply_status(job, "error", err_str)
# report a simple message to the user (to prevent leaking secrets)
message = "[Your job]($(submission(job).url)) failed."
isempty(server.config.admin) || (message *= " cc @$(server.config.admin)")
reply_comment(job, message)
reply_status(job, "error", "Job failed")

# but put all details in the server log
err_str = sprint(io->showerror(io, err))
nodelog(server.config, node, "failed job: $(summary(job))\n$err_str")
end
end