From 3812e3bbd964fefd673ee4017707815da0237445 Mon Sep 17 00:00:00 2001
From: JRPan <25518778+JRPan@users.noreply.github.com>
Date: Mon, 24 Jun 2024 22:49:41 -0700
Subject: [PATCH] Add Weekly tests  (#311)

* Adding weekly test

* 1

* Update to checkoutv4

* Use 11.7

* typo

* fix hw stats dir

* 1

* 1

* 1

* 1

* 1

* 1

* 1

* 1

* 1

* 1

* 1

* 1

* 1

* 1

* 1

* 1

* 1

* 1

* using rodinia & ubench only for now

* try again with new hw_run

* Add schedule trigger

* correct cron syntax

* run long tests on merge queue only

* run shorts first, then long tests in merged queue only
---
 .github/workflows/long-tests.yml  |  71 ++++++++++++++++-
 .github/workflows/short-tests.yml |  28 ++++++-
 .github/workflows/tracer-tool.yml |  77 ------------------
 .github/workflows/weekly.yml      | 128 ++++++++++++++++++++++++++++++
 4 files changed, 221 insertions(+), 83 deletions(-)
 delete mode 100644 .github/workflows/tracer-tool.yml
 create mode 100644 .github/workflows/weekly.yml

diff --git a/.github/workflows/long-tests.yml b/.github/workflows/long-tests.yml
index 9c770899b..7007a81ed 100644
--- a/.github/workflows/long-tests.yml
+++ b/.github/workflows/long-tests.yml
@@ -6,7 +6,8 @@ name: Long Tests
 on:
   # Triggers the workflow on push or pull request events but only for the mydev branch
   push:
-  pull_request:
+  # pull_request:
+  merge_group:
 
   # Allows you to run this workflow manually from the Actions tab
   workflow_dispatch:
@@ -20,7 +21,7 @@ jobs:
       run:
         shell: bash
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4
       - name: Setup Environment
         run: |
           rm -rf env-setup
@@ -81,7 +82,7 @@ jobs:
       run:
         shell: bash
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4
       - name: Setup Environment
         run: |
           rm -rf env-setup
@@ -107,4 +108,66 @@ jobs:
           ./gpu-app-collection/get_regression_data.sh
 
           ./util/job_launching/run_simulations.py -B rodinia_2.0-ft,GPU_Microbenchmark -C QV100-PTX,RTX2060-PTX,RTX3070-PTX -N short-ptx-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT
-          ./util/job_launching/monitor_func_test.py -v -s stats-per-app-ptx.csv -N short-ptx-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT
\ No newline at end of file
+          ./util/job_launching/monitor_func_test.py -v -s stats-per-app-ptx.csv -N short-ptx-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT
+  Tracer-Tool:
+    if: github.repository == 'accel-sim/accel-sim-framework'
+    runs-on: tgrogers-gpu01
+    defaults:
+      run:
+        shell: bash
+    steps:
+      - uses: actions/checkout@v4
+      - name: Setup Environment
+        run: |
+          rm -rf env-setup
+          git clone git@github.com:purdue-aalp/env-setup.git 
+          cd env-setup 
+          git checkout cluster-ubuntu
+      - name: Build Accel-Sim
+        run: |
+          source ./env-setup/11.7.0_env_setup.sh
+          rm -rf ./gpu-simulator/gpgpu-sim
+          source ./gpu-simulator/setup_environment.sh
+          make clean -C gpu-simulator
+          make -j -C gpu-simulator
+      - name: test-prebuilt-traces
+        run: |
+          source ./env-setup/11.7.0_env_setup.sh
+          source ./gpu-simulator/setup_environment.sh
+          ./get-accel-sim-traces.py -a tesla-v100/rodinia_2.0-ft
+          cd hw_run; tar -xzvf rodinia_2.0-ft.tgz; cd -
+          ./util/job_launching/run_simulations.py -B rodinia_2.0-ft -C QV100-SASS -T ./hw_run/ -N rodinia_2.0-ft-online-$$
+          ./util/job_launching/monitor_func_test.py -N rodinia_2.0-ft-online-$$ -v
+          rm -rf hw_run
+          rm -rf sim_run_11.0
+      - name: Build Tracer
+        run: |
+          source ./env-setup/11.7.0_env_setup.sh
+          ./util/tracer_nvbit/install_nvbit.sh
+          make clean -C ./util/tracer_nvbit/
+          make -C ./util/tracer_nvbit/
+      - name: rodinia_2.0-ft-build
+        run: |
+          source ./env-setup/11.7.0_env_setup.sh
+          rm -rf ./gpu-app-collection/
+          git clone git@github.com:accel-sim/gpu-app-collection.git
+          source ./gpu-app-collection/src/setup_environment
+          ln -s /home/tgrogers-raid/a/common/data_dirs ./gpu-app-collection/
+          make -C ./gpu-app-collection/src rodinia_2.0-ft
+      - name: generate-rodinia_2.0-ft-traces
+        run: |
+          source ./env-setup/11.7.0_env_setup.sh
+          source ./gpu-app-collection/src/setup_environment
+          rm -rf ./hw_run/
+          ./util/tracer_nvbit/run_hw_trace.py -B rodinia_2.0-ft -D 7
+      - name: generate-rodinia_2.0-ft-hw_stats
+        run: |
+          source ./env-setup/11.7.0_env_setup.sh
+          source ./gpu-app-collection/src/setup_environment
+          ./util/hw_stats/run_hw.py -B rodinia_2.0-ft -D 7
+      - name: test-new-traces
+        run: |
+          source ./env-setup/11.7.0_env_setup.sh
+          source ./gpu-simulator/setup_environment.sh
+          ./util/job_launching/run_simulations.py -B rodinia_2.0-ft -C QV100-SASS -T ./hw_run/traces/device-7/ -N rodinia_2.0-ft-$$
+          ./util/job_launching/monitor_func_test.py -I -v -s rodinia-stats-per-app.csv -N rodinia_2.0-ft-$$
\ No newline at end of file
diff --git a/.github/workflows/short-tests.yml b/.github/workflows/short-tests.yml
index 1f1270fbb..c2fbb13fb 100644
--- a/.github/workflows/short-tests.yml
+++ b/.github/workflows/short-tests.yml
@@ -13,7 +13,7 @@ on:
 
 # A workflow run is made up of one or more jobs that can run sequentially or in parallel
 jobs:
-  short-tests:
+  SASS-Simulation:
     runs-on: ubuntu-latest
     container:
       image: tgrogers/accel-sim_regress:Ubuntu-22.04-cuda-11.7
@@ -22,6 +22,30 @@ jobs:
     # Steps represent a sequence of tasks that will be executed as part of the job
     steps:
       # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4
       - name: Run Simulation
         run: /bin/bash $GITHUB_WORKSPACE/short-tests.sh
+  PTX-Simulation:
+    runs-on: ubuntu-latest
+    container:
+      image: tgrogers/accel-sim_regress:Ubuntu-22.04-cuda-11.7
+      # env:
+        
+    # Steps represent a sequence of tasks that will be executed as part of the job
+    steps:
+      # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
+      - uses: actions/checkout@v4
+      - name: Run Simulation
+        run: echo "skipped SASS-Simulation. Will perform in merge queue"
+  Tracer-Tool:
+    runs-on: ubuntu-latest
+    container:
+      image: tgrogers/accel-sim_regress:Ubuntu-22.04-cuda-11.7
+      # env:
+        
+    # Steps represent a sequence of tasks that will be executed as part of the job
+    steps:
+      # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
+      - uses: actions/checkout@v4
+      - name: Run Simulation
+        run: echo "skipped SASS-Simulation. Will perform in merge queue"
\ No newline at end of file
diff --git a/.github/workflows/tracer-tool.yml b/.github/workflows/tracer-tool.yml
deleted file mode 100644
index 1563529cc..000000000
--- a/.github/workflows/tracer-tool.yml
+++ /dev/null
@@ -1,77 +0,0 @@
-# This is a basic workflow to help you get started with Actions
-
-name: Tracer Tool
-
-# Controls when the workflow will run
-on:
-  # Triggers the workflow on push or pull request events but only for the mydev branch
-  push:
-  pull_request:
-
-  # Allows you to run this workflow manually from the Actions tab
-  workflow_dispatch:
-
-# A workflow run is made up of one or more jobs that can run sequentially or in parallel
-jobs:
-  Tracer-Tool:
-    if: github.repository == 'accel-sim/accel-sim-framework'
-    runs-on: tgrogers-gpu01
-    defaults:
-      run:
-        shell: bash
-    steps:
-      - uses: actions/checkout@v2
-      - name: Setup Environment
-        run: |
-          rm -rf env-setup
-          git clone git@github.com:purdue-aalp/env-setup.git 
-          cd env-setup 
-          git checkout cluster-ubuntu
-      - name: Build Accel-Sim
-        run: |
-          source ./env-setup/11.7.0_env_setup.sh
-          rm -rf ./gpu-simulator/gpgpu-sim
-          source ./gpu-simulator/setup_environment.sh
-          make clean -C gpu-simulator
-          make -j -C gpu-simulator
-      - name: test-prebuilt-traces
-        run: |
-          source ./env-setup/11.7.0_env_setup.sh
-          source ./gpu-simulator/setup_environment.sh
-          ./get-accel-sim-traces.py -a tesla-v100/rodinia_2.0-ft
-          cd hw_run; tar -xzvf rodinia_2.0-ft.tgz; cd -
-          ./util/job_launching/run_simulations.py -B rodinia_2.0-ft -C QV100-SASS -T ./hw_run/ -N rodinia_2.0-ft-online-$$
-          ./util/job_launching/monitor_func_test.py -N rodinia_2.0-ft-online-$$ -v
-          rm -rf hw_run
-          rm -rf sim_run_11.0
-      - name: Build Tracer
-        run: |
-          source ./env-setup/11.7.0_env_setup.sh
-          ./util/tracer_nvbit/install_nvbit.sh
-          make clean -C ./util/tracer_nvbit/
-          make -C ./util/tracer_nvbit/
-      - name: rodinia_2.0-ft-build
-        run: |
-          source ./env-setup/11.7.0_env_setup.sh
-          rm -rf ./gpu-app-collection/
-          git clone git@github.com:accel-sim/gpu-app-collection.git
-          source ./gpu-app-collection/src/setup_environment
-          ln -s /home/tgrogers-raid/a/common/data_dirs ./gpu-app-collection/
-          make -C ./gpu-app-collection/src rodinia_2.0-ft
-      - name: generate-rodinia_2.0-ft-traces
-        run: |
-          source ./env-setup/11.7.0_env_setup.sh
-          source ./gpu-app-collection/src/setup_environment
-          rm -rf ./hw_run/
-          ./util/tracer_nvbit/run_hw_trace.py -B rodinia_2.0-ft -D 7
-      - name: generate-rodinia_2.0-ft-hw_stats
-        run: |
-          source ./env-setup/11.7.0_env_setup.sh
-          source ./gpu-app-collection/src/setup_environment
-          ./util/hw_stats/run_hw.py -B rodinia_2.0-ft -D 7
-      - name: test-new-traces
-        run: |
-          source ./env-setup/11.7.0_env_setup.sh
-          source ./gpu-simulator/setup_environment.sh
-          ./util/job_launching/run_simulations.py -B rodinia_2.0-ft -C QV100-SASS -T ./hw_run/traces/device-7/ -N rodinia_2.0-ft-$$
-          ./util/job_launching/monitor_func_test.py -I -v -s rodinia-stats-per-app.csv -N rodinia_2.0-ft-$$
\ No newline at end of file
diff --git a/.github/workflows/weekly.yml b/.github/workflows/weekly.yml
new file mode 100644
index 000000000..9b7b0f62e
--- /dev/null
+++ b/.github/workflows/weekly.yml
@@ -0,0 +1,128 @@
+name: Weekly Tests
+
+on:
+  workflow_dispatch:  # manually dispatch
+  # push:
+  schedule:
+    - cron: '0 20 * * FRI'  # 8:00 PM every Friday
+
+jobs:
+  Last-Commit:
+    if: github.repository == 'accel-sim/accel-sim-framework'
+    runs-on: tgrogers-raid
+    defaults:
+      run:
+        shell: bash
+    outputs:
+      TRACER_COUNT: ${{ steps.Test-for-commit.outputs.TRACER_COUNT }}
+      ACCEL_SIM_COUNT: ${{ steps.Test-for-commit.outputs.ACCEL_SIM_COUNT }}
+      GPGPUSIM_COUNT: ${{ steps.Test-for-commit.outputs.GPGPUSIM_COUNT }}
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          ref: dev
+          fetch-depth: 0
+      - name: Setup Environment
+        run: |
+          rm -rf env-setup
+          git clone git@github.com:purdue-aalp/env-setup.git 
+          cd env-setup 
+          git checkout cluster-ubuntu
+      - name: Test For Commit
+        id: Test-for-commit
+        run: |
+          source ./env-setup/11.7.0_env_setup.sh
+          source ./gpu-simulator/setup_environment.sh
+          echo "ACCEL_SIM_COUNT=$(git log --oneline --since '1 week ago' | wc -l)" >> $GITHUB_OUTPUT
+          echo "ACCEL_SIM_COUNT=$(git log --oneline --since '1 week ago' | wc -l)"
+          echo "TRACER_COUNT=$(git log --oneline --since '1 week ago' util/tracer_nvbit | wc -l)" >> $GITHUB_OUTPUT
+          echo "TRACER_COUNT=$(git log --oneline --since '1 week ago' util/tracer_nvbit | wc -l)"
+          cd gpu-simulator/gpgpu-sim
+          echo "GPGPUSIM_COUNT=$(git log --oneline --since '1 week ago' | wc -l)" >> $GITHUB_OUTPUT
+          echo "GPGPUSIM_COUNT=$(git log --oneline --since '1 week ago' | wc -l)"
+  Tracer-Weekly:
+    needs: Last-Commit
+    if: |
+      github.repository == 'accel-sim/accel-sim-framework' &&
+      needs.Last-Commit.outputs.TRACER_COUNT > 0
+    runs-on: tgrogers-gpu01
+    defaults:
+      run:
+        shell: bash
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          ref: dev
+      - name: Setup Environment
+        run: |
+          rm -rf env-setup
+          git clone git@github.com:purdue-aalp/env-setup.git 
+          cd env-setup 
+          git checkout cluster-ubuntu
+      - name: Build Tracer
+        run: |
+          source ./env-setup/11.7.0_env_setup.sh
+          ./util/tracer_nvbit/install_nvbit.sh
+          make clean -C ./util/tracer_nvbit/
+          make -C ./util/tracer_nvbit/
+      - name: build applications
+        run: |
+          source ./env-setup/11.7.0_env_setup.sh
+          export PATH=/home/tgrogers-raid/a/common/python2:$PATH
+          rm -rf ./gpu-app-collection/
+          git clone git@github.com:accel-sim/gpu-app-collection.git
+          source ./gpu-app-collection/src/setup_environment
+          ln -s /home/tgrogers-raid/a/common/data_dirs ./gpu-app-collection/
+          make -j8 -C ./gpu-app-collection/src rodinia-3.1
+          make -j8 -C ./gpu-app-collection/src GPU_Microbenchmark
+          # make -j8 -C ./gpu-app-collection/src Deepbench_nvidia
+          # make -j8 -C ./gpu-app-collection/src parboil
+          # make -j8 -C ./gpu-app-collection/src polybench
+          # make -j8 -C ./gpu-app-collection/src cutlass
+      - name: generate traces
+        run: |
+          source ./env-setup/11.7.0_env_setup.sh
+          source ./gpu-app-collection/src/setup_environment
+          rm -rf ./hw_run/
+          rm -rf /scratch/tgrogers-disk01/a/common/for-sharing/$USER/nightly-traces
+          mkdir -p /scratch/tgrogers-disk01/a/common/for-sharing/$USER/nightly-traces
+          ln -s /scratch/tgrogers-disk01/a/common/for-sharing/$USER/nightly-traces ./hw_run
+          ./util/tracer_nvbit/run_hw_trace.py -B rodinia-3.1,GPU_Microbenchmark -D 7
+          # ./util/tracer_nvbit/run_hw_trace.py -B rodinia-3.1,GPU_Microbenchmark,parboil,polybench,cutlass_5_trace,Deepbench_nvidia_tencore,Deepbench_nvidia_normal -D 7
+  SASS-Weekly:
+    needs: [Last-Commit, Tracer-Weekly]
+    if: |
+      github.repository == 'accel-sim/accel-sim-framework' &&
+      (needs.Last-Commit.outputs.ACCEL_SIM_COUNT > 0 ||
+      needs.Last-Commit.outputs.GPGPUSIM_COUNT > 0) &&
+      needs.Tracer-Weekly.result != 'failure' &&
+      always()
+    runs-on: tgrogers-raid 
+    defaults:
+      run:
+        shell: bash
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          ref: dev
+      - name: Setup Environment
+        run: |
+          rm -rf env-setup
+          git clone git@github.com:purdue-aalp/env-setup.git 
+          cd env-setup 
+          git checkout cluster-ubuntu
+      - name: Build Accel-Sim
+        run: |
+          source ./env-setup/11.7.0_env_setup.sh
+          rm -rf ./gpu-simulator/gpgpu-sim
+          source ./gpu-simulator/setup_environment.sh
+          make clean -C gpu-simulator
+          make -j -C gpu-simulator
+      - name: run SASS
+        run: |
+          source ./env-setup/11.7.0_env_setup.sh
+          source ./gpu-simulator/setup_environment.sh
+          ln -s /scratch/tgrogers-disk01/a/common/for-sharing/$USER/nightly-traces ./hw_run
+          # ./util/job_launching/run_simulations.py -B rodinia-3.1,GPU_Microbenchmark,sdk-4.2-scaled,parboil,polybench,cutlass_5_trace,Deepbench_nvidia_tencore,Deepbench_nvidia_normal -C QV100-SASS-5B_INSN -T ./hw_run/traces/device-7/11.7 -N weekly-$$ -M 70G
+          ./util/job_launching/run_simulations.py -B rodinia-3.1,GPU_Microbenchmark -C QV100-SASS-5B_INSN -T ./hw_run/traces/device-7/11.7 -N weekly-$$ -M 70G
+          ./util/job_launching/monitor_func_test.py -T 12 -S 1800 -I -v -s weekly-stats-per-app.csv -N weekly-$$