From 3812e3bbd964fefd673ee4017707815da0237445 Mon Sep 17 00:00:00 2001 From: JRPan <25518778+JRPan@users.noreply.github.com> Date: Mon, 24 Jun 2024 22:49:41 -0700 Subject: [PATCH] Add Weekly tests (#311) * Adding weekly test * 1 * Update to checkoutv4 * Use 11.7 * typo * fix hw stats dir * 1 * 1 * 1 * 1 * 1 * 1 * 1 * 1 * 1 * 1 * 1 * 1 * 1 * 1 * 1 * 1 * 1 * 1 * using rodinia & ubench only for now * try again with new hw_run * Add schedule trigger * correct cron syntax * run long tests on merge queue only * run shorts first, then long tests in merged queue only --- .github/workflows/long-tests.yml | 71 ++++++++++++++++- .github/workflows/short-tests.yml | 28 ++++++- .github/workflows/tracer-tool.yml | 77 ------------------ .github/workflows/weekly.yml | 128 ++++++++++++++++++++++++++++++ 4 files changed, 221 insertions(+), 83 deletions(-) delete mode 100644 .github/workflows/tracer-tool.yml create mode 100644 .github/workflows/weekly.yml diff --git a/.github/workflows/long-tests.yml b/.github/workflows/long-tests.yml index 9c770899b..7007a81ed 100644 --- a/.github/workflows/long-tests.yml +++ b/.github/workflows/long-tests.yml @@ -6,7 +6,8 @@ name: Long Tests on: # Triggers the workflow on push or pull request events but only for the mydev branch push: - pull_request: + # pull_request: + merge_group: # Allows you to run this workflow manually from the Actions tab workflow_dispatch: @@ -20,7 +21,7 @@ jobs: run: shell: bash steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Setup Environment run: | rm -rf env-setup @@ -81,7 +82,7 @@ jobs: run: shell: bash steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Setup Environment run: | rm -rf env-setup @@ -107,4 +108,66 @@ jobs: ./gpu-app-collection/get_regression_data.sh ./util/job_launching/run_simulations.py -B rodinia_2.0-ft,GPU_Microbenchmark -C QV100-PTX,RTX2060-PTX,RTX3070-PTX -N short-ptx-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT - ./util/job_launching/monitor_func_test.py -v -s stats-per-app-ptx.csv -N short-ptx-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT \ No newline at end of file + ./util/job_launching/monitor_func_test.py -v -s stats-per-app-ptx.csv -N short-ptx-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT + Tracer-Tool: + if: github.repository == 'accel-sim/accel-sim-framework' + runs-on: tgrogers-gpu01 + defaults: + run: + shell: bash + steps: + - uses: actions/checkout@v4 + - name: Setup Environment + run: | + rm -rf env-setup + git clone git@github.com:purdue-aalp/env-setup.git + cd env-setup + git checkout cluster-ubuntu + - name: Build Accel-Sim + run: | + source ./env-setup/11.7.0_env_setup.sh + rm -rf ./gpu-simulator/gpgpu-sim + source ./gpu-simulator/setup_environment.sh + make clean -C gpu-simulator + make -j -C gpu-simulator + - name: test-prebuilt-traces + run: | + source ./env-setup/11.7.0_env_setup.sh + source ./gpu-simulator/setup_environment.sh + ./get-accel-sim-traces.py -a tesla-v100/rodinia_2.0-ft + cd hw_run; tar -xzvf rodinia_2.0-ft.tgz; cd - + ./util/job_launching/run_simulations.py -B rodinia_2.0-ft -C QV100-SASS -T ./hw_run/ -N rodinia_2.0-ft-online-$$ + ./util/job_launching/monitor_func_test.py -N rodinia_2.0-ft-online-$$ -v + rm -rf hw_run + rm -rf sim_run_11.0 + - name: Build Tracer + run: | + source ./env-setup/11.7.0_env_setup.sh + ./util/tracer_nvbit/install_nvbit.sh + make clean -C ./util/tracer_nvbit/ + make -C ./util/tracer_nvbit/ + - name: rodinia_2.0-ft-build + run: | + source ./env-setup/11.7.0_env_setup.sh + rm -rf ./gpu-app-collection/ + git clone git@github.com:accel-sim/gpu-app-collection.git + source ./gpu-app-collection/src/setup_environment + ln -s /home/tgrogers-raid/a/common/data_dirs ./gpu-app-collection/ + make -C ./gpu-app-collection/src rodinia_2.0-ft + - name: generate-rodinia_2.0-ft-traces + run: | + source ./env-setup/11.7.0_env_setup.sh + source ./gpu-app-collection/src/setup_environment + rm -rf ./hw_run/ + ./util/tracer_nvbit/run_hw_trace.py -B rodinia_2.0-ft -D 7 + - name: generate-rodinia_2.0-ft-hw_stats + run: | + source ./env-setup/11.7.0_env_setup.sh + source ./gpu-app-collection/src/setup_environment + ./util/hw_stats/run_hw.py -B rodinia_2.0-ft -D 7 + - name: test-new-traces + run: | + source ./env-setup/11.7.0_env_setup.sh + source ./gpu-simulator/setup_environment.sh + ./util/job_launching/run_simulations.py -B rodinia_2.0-ft -C QV100-SASS -T ./hw_run/traces/device-7/ -N rodinia_2.0-ft-$$ + ./util/job_launching/monitor_func_test.py -I -v -s rodinia-stats-per-app.csv -N rodinia_2.0-ft-$$ \ No newline at end of file diff --git a/.github/workflows/short-tests.yml b/.github/workflows/short-tests.yml index 1f1270fbb..c2fbb13fb 100644 --- a/.github/workflows/short-tests.yml +++ b/.github/workflows/short-tests.yml @@ -13,7 +13,7 @@ on: # A workflow run is made up of one or more jobs that can run sequentially or in parallel jobs: - short-tests: + SASS-Simulation: runs-on: ubuntu-latest container: image: tgrogers/accel-sim_regress:Ubuntu-22.04-cuda-11.7 @@ -22,6 +22,30 @@ jobs: # Steps represent a sequence of tasks that will be executed as part of the job steps: # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Run Simulation run: /bin/bash $GITHUB_WORKSPACE/short-tests.sh + PTX-Simulation: + runs-on: ubuntu-latest + container: + image: tgrogers/accel-sim_regress:Ubuntu-22.04-cuda-11.7 + # env: + + # Steps represent a sequence of tasks that will be executed as part of the job + steps: + # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it + - uses: actions/checkout@v4 + - name: Run Simulation + run: echo "skipped SASS-Simulation. Will perform in merge queue" + Tracer-Tool: + runs-on: ubuntu-latest + container: + image: tgrogers/accel-sim_regress:Ubuntu-22.04-cuda-11.7 + # env: + + # Steps represent a sequence of tasks that will be executed as part of the job + steps: + # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it + - uses: actions/checkout@v4 + - name: Run Simulation + run: echo "skipped SASS-Simulation. Will perform in merge queue" \ No newline at end of file diff --git a/.github/workflows/tracer-tool.yml b/.github/workflows/tracer-tool.yml deleted file mode 100644 index 1563529cc..000000000 --- a/.github/workflows/tracer-tool.yml +++ /dev/null @@ -1,77 +0,0 @@ -# This is a basic workflow to help you get started with Actions - -name: Tracer Tool - -# Controls when the workflow will run -on: - # Triggers the workflow on push or pull request events but only for the mydev branch - push: - pull_request: - - # Allows you to run this workflow manually from the Actions tab - workflow_dispatch: - -# A workflow run is made up of one or more jobs that can run sequentially or in parallel -jobs: - Tracer-Tool: - if: github.repository == 'accel-sim/accel-sim-framework' - runs-on: tgrogers-gpu01 - defaults: - run: - shell: bash - steps: - - uses: actions/checkout@v2 - - name: Setup Environment - run: | - rm -rf env-setup - git clone git@github.com:purdue-aalp/env-setup.git - cd env-setup - git checkout cluster-ubuntu - - name: Build Accel-Sim - run: | - source ./env-setup/11.7.0_env_setup.sh - rm -rf ./gpu-simulator/gpgpu-sim - source ./gpu-simulator/setup_environment.sh - make clean -C gpu-simulator - make -j -C gpu-simulator - - name: test-prebuilt-traces - run: | - source ./env-setup/11.7.0_env_setup.sh - source ./gpu-simulator/setup_environment.sh - ./get-accel-sim-traces.py -a tesla-v100/rodinia_2.0-ft - cd hw_run; tar -xzvf rodinia_2.0-ft.tgz; cd - - ./util/job_launching/run_simulations.py -B rodinia_2.0-ft -C QV100-SASS -T ./hw_run/ -N rodinia_2.0-ft-online-$$ - ./util/job_launching/monitor_func_test.py -N rodinia_2.0-ft-online-$$ -v - rm -rf hw_run - rm -rf sim_run_11.0 - - name: Build Tracer - run: | - source ./env-setup/11.7.0_env_setup.sh - ./util/tracer_nvbit/install_nvbit.sh - make clean -C ./util/tracer_nvbit/ - make -C ./util/tracer_nvbit/ - - name: rodinia_2.0-ft-build - run: | - source ./env-setup/11.7.0_env_setup.sh - rm -rf ./gpu-app-collection/ - git clone git@github.com:accel-sim/gpu-app-collection.git - source ./gpu-app-collection/src/setup_environment - ln -s /home/tgrogers-raid/a/common/data_dirs ./gpu-app-collection/ - make -C ./gpu-app-collection/src rodinia_2.0-ft - - name: generate-rodinia_2.0-ft-traces - run: | - source ./env-setup/11.7.0_env_setup.sh - source ./gpu-app-collection/src/setup_environment - rm -rf ./hw_run/ - ./util/tracer_nvbit/run_hw_trace.py -B rodinia_2.0-ft -D 7 - - name: generate-rodinia_2.0-ft-hw_stats - run: | - source ./env-setup/11.7.0_env_setup.sh - source ./gpu-app-collection/src/setup_environment - ./util/hw_stats/run_hw.py -B rodinia_2.0-ft -D 7 - - name: test-new-traces - run: | - source ./env-setup/11.7.0_env_setup.sh - source ./gpu-simulator/setup_environment.sh - ./util/job_launching/run_simulations.py -B rodinia_2.0-ft -C QV100-SASS -T ./hw_run/traces/device-7/ -N rodinia_2.0-ft-$$ - ./util/job_launching/monitor_func_test.py -I -v -s rodinia-stats-per-app.csv -N rodinia_2.0-ft-$$ \ No newline at end of file diff --git a/.github/workflows/weekly.yml b/.github/workflows/weekly.yml new file mode 100644 index 000000000..9b7b0f62e --- /dev/null +++ b/.github/workflows/weekly.yml @@ -0,0 +1,128 @@ +name: Weekly Tests + +on: + workflow_dispatch: # manually dispatch + # push: + schedule: + - cron: '0 20 * * FRI' # 8:00 PM every Friday + +jobs: + Last-Commit: + if: github.repository == 'accel-sim/accel-sim-framework' + runs-on: tgrogers-raid + defaults: + run: + shell: bash + outputs: + TRACER_COUNT: ${{ steps.Test-for-commit.outputs.TRACER_COUNT }} + ACCEL_SIM_COUNT: ${{ steps.Test-for-commit.outputs.ACCEL_SIM_COUNT }} + GPGPUSIM_COUNT: ${{ steps.Test-for-commit.outputs.GPGPUSIM_COUNT }} + steps: + - uses: actions/checkout@v4 + with: + ref: dev + fetch-depth: 0 + - name: Setup Environment + run: | + rm -rf env-setup + git clone git@github.com:purdue-aalp/env-setup.git + cd env-setup + git checkout cluster-ubuntu + - name: Test For Commit + id: Test-for-commit + run: | + source ./env-setup/11.7.0_env_setup.sh + source ./gpu-simulator/setup_environment.sh + echo "ACCEL_SIM_COUNT=$(git log --oneline --since '1 week ago' | wc -l)" >> $GITHUB_OUTPUT + echo "ACCEL_SIM_COUNT=$(git log --oneline --since '1 week ago' | wc -l)" + echo "TRACER_COUNT=$(git log --oneline --since '1 week ago' util/tracer_nvbit | wc -l)" >> $GITHUB_OUTPUT + echo "TRACER_COUNT=$(git log --oneline --since '1 week ago' util/tracer_nvbit | wc -l)" + cd gpu-simulator/gpgpu-sim + echo "GPGPUSIM_COUNT=$(git log --oneline --since '1 week ago' | wc -l)" >> $GITHUB_OUTPUT + echo "GPGPUSIM_COUNT=$(git log --oneline --since '1 week ago' | wc -l)" + Tracer-Weekly: + needs: Last-Commit + if: | + github.repository == 'accel-sim/accel-sim-framework' && + needs.Last-Commit.outputs.TRACER_COUNT > 0 + runs-on: tgrogers-gpu01 + defaults: + run: + shell: bash + steps: + - uses: actions/checkout@v4 + with: + ref: dev + - name: Setup Environment + run: | + rm -rf env-setup + git clone git@github.com:purdue-aalp/env-setup.git + cd env-setup + git checkout cluster-ubuntu + - name: Build Tracer + run: | + source ./env-setup/11.7.0_env_setup.sh + ./util/tracer_nvbit/install_nvbit.sh + make clean -C ./util/tracer_nvbit/ + make -C ./util/tracer_nvbit/ + - name: build applications + run: | + source ./env-setup/11.7.0_env_setup.sh + export PATH=/home/tgrogers-raid/a/common/python2:$PATH + rm -rf ./gpu-app-collection/ + git clone git@github.com:accel-sim/gpu-app-collection.git + source ./gpu-app-collection/src/setup_environment + ln -s /home/tgrogers-raid/a/common/data_dirs ./gpu-app-collection/ + make -j8 -C ./gpu-app-collection/src rodinia-3.1 + make -j8 -C ./gpu-app-collection/src GPU_Microbenchmark + # make -j8 -C ./gpu-app-collection/src Deepbench_nvidia + # make -j8 -C ./gpu-app-collection/src parboil + # make -j8 -C ./gpu-app-collection/src polybench + # make -j8 -C ./gpu-app-collection/src cutlass + - name: generate traces + run: | + source ./env-setup/11.7.0_env_setup.sh + source ./gpu-app-collection/src/setup_environment + rm -rf ./hw_run/ + rm -rf /scratch/tgrogers-disk01/a/common/for-sharing/$USER/nightly-traces + mkdir -p /scratch/tgrogers-disk01/a/common/for-sharing/$USER/nightly-traces + ln -s /scratch/tgrogers-disk01/a/common/for-sharing/$USER/nightly-traces ./hw_run + ./util/tracer_nvbit/run_hw_trace.py -B rodinia-3.1,GPU_Microbenchmark -D 7 + # ./util/tracer_nvbit/run_hw_trace.py -B rodinia-3.1,GPU_Microbenchmark,parboil,polybench,cutlass_5_trace,Deepbench_nvidia_tencore,Deepbench_nvidia_normal -D 7 + SASS-Weekly: + needs: [Last-Commit, Tracer-Weekly] + if: | + github.repository == 'accel-sim/accel-sim-framework' && + (needs.Last-Commit.outputs.ACCEL_SIM_COUNT > 0 || + needs.Last-Commit.outputs.GPGPUSIM_COUNT > 0) && + needs.Tracer-Weekly.result != 'failure' && + always() + runs-on: tgrogers-raid + defaults: + run: + shell: bash + steps: + - uses: actions/checkout@v4 + with: + ref: dev + - name: Setup Environment + run: | + rm -rf env-setup + git clone git@github.com:purdue-aalp/env-setup.git + cd env-setup + git checkout cluster-ubuntu + - name: Build Accel-Sim + run: | + source ./env-setup/11.7.0_env_setup.sh + rm -rf ./gpu-simulator/gpgpu-sim + source ./gpu-simulator/setup_environment.sh + make clean -C gpu-simulator + make -j -C gpu-simulator + - name: run SASS + run: | + source ./env-setup/11.7.0_env_setup.sh + source ./gpu-simulator/setup_environment.sh + ln -s /scratch/tgrogers-disk01/a/common/for-sharing/$USER/nightly-traces ./hw_run + # ./util/job_launching/run_simulations.py -B rodinia-3.1,GPU_Microbenchmark,sdk-4.2-scaled,parboil,polybench,cutlass_5_trace,Deepbench_nvidia_tencore,Deepbench_nvidia_normal -C QV100-SASS-5B_INSN -T ./hw_run/traces/device-7/11.7 -N weekly-$$ -M 70G + ./util/job_launching/run_simulations.py -B rodinia-3.1,GPU_Microbenchmark -C QV100-SASS-5B_INSN -T ./hw_run/traces/device-7/11.7 -N weekly-$$ -M 70G + ./util/job_launching/monitor_func_test.py -T 12 -S 1800 -I -v -s weekly-stats-per-app.csv -N weekly-$$