diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 3c36907414..5454abbb2c 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -384,7 +384,6 @@ steps: build.message !~ /\[skip special\]/ timeout_in_minutes: 45 - # we want to benchmark every commit on the master branch, even if it failed CI - wait: ~ continue_on_failure: true @@ -412,78 +411,36 @@ steps: build.message !~ /\[skip docs\]/ timeout_in_minutes: 15 - - group: ":racehorse: Benchmarks" - steps: - # benchmarks outside of the master branch don't submit their results, - # so they can run on any system in the juliagpu queue. - - label: "Benchmarks (dry run)" - plugins: - - JuliaCI/julia#v1: - version: "1.11" - command: | - julia --project -e ' - using Pkg - - println("--- :julia: Instantiating project") - Pkg.resolve() - Pkg.instantiate() - Pkg.activate("perf") - Pkg.resolve() - Pkg.instantiate() - push!(LOAD_PATH, @__DIR__) - - println("+++ :julia: Benchmarking") - include("perf/runbenchmarks.jl")' - agents: - queue: "juliagpu" - cuda: "*" - if: | - build.message =~ /\[only benchmarks\]/ || - build.message !~ /\[only/ && !build.pull_request.draft && - build.message !~ /\[skip benchmarks\]/ - timeout_in_minutes: 30 - - # if we will submit results, use the benchmark queue so that we will - # be running on the same system each time - - label: "Benchmarks on Julia {{matrix.julia}}" - plugins: - - JuliaCI/julia#v1: - version: "{{matrix.julia}}" - env: - BENCHMARKS: "true" - CODESPEED_PROJECT: "$BUILDKITE_PIPELINE_NAME" - CODESPEED_BRANCH: "$BUILDKITE_BRANCH" - CODESPEED_COMMIT: "$BUILDKITE_COMMIT" - CODESPEED_EXECUTABLE: "Julia {{matrix.julia}}" - command: | - julia --project -e ' - using Pkg - ENV["CODESPEED_ENVIRONMENT"] = ENV["BUILDKITE_AGENT_NAME"] + - label: ":racehorse: Benchmarks" + plugins: + - JuliaCI/julia#v1: + version: "1.11" + env: + BENCHMARKS: "true" + CODESPEED_PROJECT: "$BUILDKITE_PIPELINE_NAME" + CODESPEED_BRANCH: "$BUILDKITE_BRANCH" + CODESPEED_COMMIT: "$BUILDKITE_COMMIT" + CODESPEED_EXECUTABLE: "Julia {{matrix.julia}}" + command: | + julia --project=perf -e ' + using Pkg - println("--- :julia: Instantiating project") - Pkg.resolve() - Pkg.instantiate() - Pkg.activate("perf") - Pkg.resolve() - Pkg.instantiate() - push!(LOAD_PATH, @__DIR__) + println("--- :julia: Instantiating project") + Pkg.develop([PackageSpec(path=pwd())]) - println("+++ :julia: Benchmarking") - include("perf/runbenchmarks.jl")' - agents: - queue: "benchmark" - gpu: "rtx2070" - cuda: "*" - if: | - build.branch =~ /^master$$/ && build.message =~ /\[only benchmarks\]/ || - build.branch =~ /^master$$/ && build.message !~ /\[only/ && - build.message !~ /\[skip benchmarks\]/ - matrix: - setup: - julia: - - "1.11" - - "1.11" - timeout_in_minutes: 30 + println("+++ :julia: Benchmarking") + include("perf/runbenchmarks.jl")' + artifact_paths: + - "benchmarkresults.json" + agents: + queue: "benchmark" + gpu: "rtx2070" + cuda: "*" + if: | + build.message =~ /\[only benchmarks\]/ || + build.message !~ /\[only/ && !build.pull_request.draft && + build.message !~ /\[skip benchmarks\]/ + timeout_in_minutes: 30 env: JULIA_PKG_SERVER_REGISTRY_PREFERENCE: "eager" # OK to downloading JLLs from GitHub diff --git a/.github/workflows/Benchmark.yml b/.github/workflows/Benchmark.yml new file mode 100644 index 0000000000..9eb855c52b --- /dev/null +++ b/.github/workflows/Benchmark.yml @@ -0,0 +1,66 @@ +name: Benchmarks +permissions: + statuses: read # find Buildkite URL from PR status + contents: write # update benchmark contents in gh-pages branch + pull-requests: write # comment on PR with benchmark results + deployments: write # deploy GitHub pages website + +on: + pull_request_target: + branches: + - main + paths: + - "src/**/*" + - "lib/**/*" + - "ext/**/*" + - "perf/**/*" + - ".buildkite/**/*" + - "Project.toml" + push: + branches: + - main + paths: + - "src/**/*" + - "lib/**/*" + - "ext/**/*" + - "perf/**/*" + - ".buildkite/**/*" + - "Project.toml" + +jobs: + benchmark: + if: | + contains(github.event.head_commit.message, '[only benchmarks]') || + !contains(github.event.head_commit.message, '[only') && + !contains(github.event.head_commit.message, '[skip benchmarks]') && + !github.event.pull_request.draft + runs-on: ubuntu-latest + steps: + - name: Download Buildkite Artifacts + id: download + uses: EnricoMi/download-buildkite-artifact-action@v1 + with: + buildkite_token: ${{ secrets.BUILDKITE_TOKEN }} + ignore_build_states: blocked,canceled,skipped,not_run + ignore_job_states: timed_out,failed + output_path: artifacts + + - name: Locate Benchmarks Artifact + id: locate + if: ${{ steps.download.outputs.download-state == 'success' }} + run: echo "path=$(find artifacts -type f -name benchmarkresults.json 2>/dev/null)" >> $GITHUB_OUTPUT + + - name: Upload Benchmark Results + if: ${{ steps.locate.outputs.path != '' }} + uses: benchmark-action/github-action-benchmark@v1 + with: + name: CUDA.jl Benchmarks + tool: "julia" + output-file-path: ${{ steps.locate.outputs.path }} + benchmark-data-dir-path: "bench" + github-token: ${{ secrets.GITHUB_TOKEN }} + comment-always: ${{ github.event_name == 'pull_request' }} + summary-always: true + alert-threshold: "125%" + fail-on-alert: false + auto-push: ${{ github.event_name != 'pull_request' }} diff --git a/README.md b/README.md index 66cc75ff69..3676c61aa2 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ *CUDA programming in Julia* -[![][doi-img]][doi-url] [![][docs-stable-img]][docs-stable-url] [![][docs-dev-img]][docs-dev-url] [![][buildkite-img]][buildkite-url] [![][codecov-img]][codecov-url] [![][codespeed-trend-img]][codespeed-trend-url] [![][codespeed-chart-img]][codespeed-chart-url] +[![][doi-img]][doi-url] [![][docs-stable-img]][docs-stable-url] [![][docs-dev-img]][docs-dev-url] [![][buildkite-img]][buildkite-url] [![][codecov-img]][codecov-url] [![][benchmark-img]][benchmark-url] [doi-img]: https://zenodo.org/badge/doi/10.1109/TPDS.2018.2872064.svg [doi-url]: https://ieeexplore.ieee.org/abstract/document/8471188 @@ -19,11 +19,8 @@ [codecov-img]: https://codecov.io/gh/JuliaGPU/CUDA.jl/branch/master/graph/badge.svg [codecov-url]: https://codecov.io/gh/JuliaGPU/CUDA.jl -[codespeed-chart-img]: https://img.shields.io/badge/benchmarks-Chart-yellowgreen -[codespeed-chart-url]: https://speed.juliagpu.org/timeline/#/?exe=9,11&env=1&base=none&ben=grid&revs=50 - -[codespeed-trend-img]: https://img.shields.io/badge/benchmarks-Trend-yellowgreen -[codespeed-trend-url]: https://speed.juliagpu.org/changes/?exe=9&env=1&tre=50 +[benchmark-img]: https://img.shields.io/badge/benchmarks-Chart-yellowgreen +[benchmark-url]: https://cuda.juliagpu.org/bench/ The CUDA.jl package is the main programming interface for working with NVIDIA CUDA GPUs using Julia. It features a user-friendly array abstraction, a compiler for writing CUDA diff --git a/perf/runbenchmarks.jl b/perf/runbenchmarks.jl index 5969dcd9b7..8d9f9d3a9c 100644 --- a/perf/runbenchmarks.jl +++ b/perf/runbenchmarks.jl @@ -7,13 +7,6 @@ using BenchmarkTools using StableRNGs rng = StableRNG(123) -# we only submit results when running on the master branch -real_run = get(ENV, "CODESPEED_BRANCH", nothing) == "master" -if real_run - # to find untuned benchmarks - BenchmarkTools.DEFAULT_PARAMETERS.evals = 0 -end - # convenience macro to create a benchmark that requires synchronizing the GPU macro async_benchmarkable(ex...) quote @@ -28,21 +21,17 @@ latency_results = include("latency.jl") SUITE = BenchmarkGroup() -# NOTE: don't use spaces in benchmark names (tobami/codespeed#256) - include("cuda.jl") include("kernel.jl") include("array.jl") -if real_run - @info "Preparing main benchmarks" - warmup(SUITE; verbose=false) - tune!(SUITE) +@info "Preparing main benchmarks" +warmup(SUITE; verbose=false) +tune!(SUITE) - # reclaim memory that might have been used by the tuning process - GC.gc(true) - CUDA.reclaim() -end +# reclaim memory that might have been used by the tuning process +GC.gc(true) +CUDA.reclaim() # benchmark groups that aren't part of the suite addgroup!(SUITE, "integration") @@ -60,69 +49,5 @@ integration_results["cudadevrt"] = include("cudadevrt.jl") results["latency"] = latency_results results["integration"] = integration_results -println(results) - - -## comparison - # write out the results -BenchmarkTools.save(joinpath(@__DIR__, "results.json"), results) - -# compare against previous results -# TODO: store these results so that we can compare when benchmarking PRs -reference_path = joinpath(@__DIR__, "reference.json") -if ispath(reference_path) - reference = BenchmarkTools.load(reference_path)[1] - comparison = judge(minimum(results), minimum(reference)) - - println("Improvements:") - println(improvements(comparison)) - - println("Regressions:") - println(regressions(comparison)) -end - - -## submission - -using JSON, HTTP - -if real_run - @info "Submitting to Codespeed..." - - basedata = Dict( - "branch" => ENV["CODESPEED_BRANCH"], - "commitid" => ENV["CODESPEED_COMMIT"], - "project" => ENV["CODESPEED_PROJECT"], - "environment" => ENV["CODESPEED_ENVIRONMENT"], - "executable" => ENV["CODESPEED_EXECUTABLE"] - ) - - # convert nested groups of benchmark to flat dictionaries of results - flat_results = [] - function flatten(results, prefix="") - for (key,value) in results - if value isa BenchmarkGroup - flatten(value, "$prefix$key/") - else - @assert value isa BenchmarkTools.Trial - - # codespeed reports maxima, but those are often very noisy. - # get rid of measurements that unnecessarily skew the distribution. - rmskew!(value) - - push!(flat_results, - Dict(basedata..., - "benchmark" => "$prefix$key", - "result_value" => median(value).time / 1e9, - "min" => minimum(value).time / 1e9, - "max" => maximum(value).time / 1e9)) - end - end - end - flatten(results) - - HTTP.post("$(ENV["CODESPEED_SERVER"])/result/add/json/", - ["Content-Type" => "application/x-www-form-urlencoded"], - HTTP.URIs.escapeuri(Dict("json" => JSON.json(flat_results)))) -end +BenchmarkTools.save("benchmarkresults.json", median(results))