Skip to content

Commit

Permalink
cutlass update (accel-sim#242)
Browse files Browse the repository at this point in the history
* cutlass: Added cutlass 3.0 to gpu-app-collection

This commit contains the changes in the make file and the
define-app-apps file.

* Commit changes to cutlass app

* Changes for cutlass in define-all-apps

* Changes to define-power.yml for cutlass 3

* Comment change of cutlass

---------

Co-authored-by: JRPan <25518778+JRPan@users.noreply.github.com>
Co-authored-by: Tim Rogers <timrogers@purdue.edu>
Co-authored-by: WilliamMTK <China_Aisa@live.com>
  • Loading branch information
4 people committed Sep 3, 2024
1 parent 0e21d6d commit 086e674
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 46 deletions.
92 changes: 50 additions & 42 deletions util/job_launching/apps/define-all-apps.yml
Original file line number Diff line number Diff line change
Expand Up @@ -511,51 +511,59 @@ cutlass_5_trace:
exec_dir: "$GPUAPPS_ROOT/bin/$CUDA_VERSION/release/"
data_dirs: "$GPUAPPS_ROOT/data_dirs/"
execs:
- cutlass_perf_test:
- args: --seed=2020 --dist=0 --m=2560 --n=16 --k=2560 --kernels=wmma_gemm_nn --iterations=5 --providers=cutlass
accel-sim-mem: 3G
- args: --seed=2020 --dist=0 --m=2560 --n=32 --k=2560 --kernels=wmma_gemm_nn --iterations=5 --providers=cutlass
accel-sim-mem: 3G
- args: --seed=2020 --dist=0 --m=2560 --n=64 --k=2560 --kernels=wmma_gemm_nn --iterations=5 --providers=cutlass
accel-sim-mem: 3G
- args: --seed=2020 --dist=0 --m=2560 --n=128 --k=2560 --kernels=wmma_gemm_nn --iterations=5 --providers=cutlass
accel-sim-mem: 3G
- args: --seed=2020 --dist=0 --m=2560 --n=7000 --k=2560 --kernels=wmma_gemm_nn --iterations=5 --providers=cutlass
accel-sim-mem: 3G
- args: --seed=2020 --dist=0 --m=4096 --n=16 --k=4096 --kernels=wmma_gemm_nn --iterations=5 --providers=cutlass
accel-sim-mem: 5G
- args: --seed=2020 --dist=0 --m=4096 --n=32 --k=4096 --kernels=wmma_gemm_nn --iterations=5 --providers=cutlass
accel-sim-mem: 5G
- args: --seed=2020 --dist=0 --m=4096 --n=64 --k=4096 --kernels=wmma_gemm_nn --iterations=5 --providers=cutlass
accel-sim-mem: 5G
- args: --seed=2020 --dist=0 --m=4096 --n=128 --k=4096 --kernels=wmma_gemm_nn --iterations=5 --providers=cutlass
accel-sim-mem: 5G
- args: --seed=2020 --dist=0 --m=4096 --n=7000 --k=4096 --kernels=wmma_gemm_nn --iterations=5 --providers=cutlass
accel-sim-mem: 5G
- args: --seed=2020 --dist=0 --m=2560 --n=16 --k=2560 --kernels=sgemm_nn --iterations=5 --providers=cutlass
- cutlass_profiler:
#single precision gemm kernels
- args: --seed=2020 --dist=0 --m=2560 --n=16 --k=2560 --kernels=sgemm --iterations=5 --providers=cutlass
accel-sim-mem: 13G
- args: --seed=2020 --dist=0 --m=2560 --n=32 --k=2560 --kernels=sgemm_nn --iterations=5 --providers=cutlass
- args: --seed=2020 --dist=0 --m=2560 --n=32 --k=2560 --kernels=sgemm --iterations=5 --providers=cutlass
accel-sim-mem: 13G
- args: --seed=2020 --dist=0 --m=2560 --n=64 --k=2560 --kernels=sgemm_nn --iterations=5 --providers=cutlass
# - args: --seed=2020 --dist=0 --m=2560 --n=64 --k=2560 --kernels=sgemm --iterations=5 --providers=cutlass
# accel-sim-mem: 13G
# - args: --seed=2020 --dist=0 --m=2560 --n=128 --k=2560 --kernels=sgemm --iterations=5 --providers=cutlass
# accel-sim-mem: 13G
# - args: --seed=2020 --dist=0 --m=2560 --n=512 --k=2560 --kernels=sgemm --iterations=5 --providers=cutlass
# accel-sim-mem: 13G
# - args: --seed=2020 --dist=0 --m=2560 --n=1024 --k=2560 --kernels=sgemm --iterations=5 --providers=cutlass
# accel-sim-mem: 13G
# - args: --seed=2020 --dist=0 --m=2560 --n=2560 --k=2560 --kernels=sgemm --iterations=5 --providers=cutlass
# accel-sim-mem: 13G
# - args: --seed=2020 --dist=0 --m=4096 --n=16 --k=4096 --kernels=sgemm --iterations=5 --providers=cutlass
# accel-sim-mem: 16G
# - args: --seed=2020 --dist=0 --m=4096 --n=32 --k=4096 --kernels=sgemm --iterations=5 --providers=cutlass
# accel-sim-mem: 16G
# - args: --seed=2020 --dist=0 --m=4096 --n=64 --k=4096 --kernels=sgemm --iterations=5 --providers=cutlass
# accel-sim-mem: 16G
# - args: --seed=2020 --dist=0 --m=4096 --n=128 --k=4096 --kernels=sgemm --iterations=5 --providers=cutlass
# accel-sim-mem: 16G
# - args: --seed=2020 --dist=0 --m=4096 --n=4096 --k=4096 --kernels=sgemm --iterations=5 --providers=cutlass
# accel-sim-mem: 20G
#gemm kernels on tensor cores
- args: --seed=2020 --dist=0 --operation=gemm --m=2560 --n=16 --k=2560 --op_class=tensorop --iterations=5 --provider=cutlass
accel-sim-mem: 13G
- args: --seed=2020 --dist=0 --m=2560 --n=128 --k=2560 --kernels=sgemm_nn --iterations=5 --providers=cutlass
accel-sim-mem: 13G
- args: --seed=2020 --dist=0 --m=2560 --n=512 --k=2560 --kernels=sgemm_nn --iterations=5 --providers=cutlass
accel-sim-mem: 13G
- args: --seed=2020 --dist=0 --m=2560 --n=1024 --k=2560 --kernels=sgemm_nn --iterations=5 --providers=cutlass
accel-sim-mem: 13G
- args: --seed=2020 --dist=0 --m=2560 --n=2560 --k=2560 --kernels=sgemm_nn --iterations=5 --providers=cutlass
accel-sim-mem: 13G
- args: --seed=2020 --dist=0 --m=4096 --n=16 --k=4096 --kernels=sgemm_nn --iterations=5 --providers=cutlass
accel-sim-mem: 16G
- args: --seed=2020 --dist=0 --m=4096 --n=32 --k=4096 --kernels=sgemm_nn --iterations=5 --providers=cutlass
accel-sim-mem: 16G
- args: --seed=2020 --dist=0 --m=4096 --n=64 --k=4096 --kernels=sgemm_nn --iterations=5 --providers=cutlass
accel-sim-mem: 16G
- args: --seed=2020 --dist=0 --m=4096 --n=128 --k=4096 --kernels=sgemm_nn --iterations=5 --providers=cutlass
accel-sim-mem: 16G
- args: --seed=2020 --dist=0 --m=4096 --n=4096 --k=4096 --kernels=sgemm_nn --iterations=5 --providers=cutlass
accel-sim-mem: 20G
# - args: --seed=2020 --dist=0 --operation=gemm --m=2560 --n=32 --k=2560 --op_class=tensorop --iterations=5 --provider=cutlass
# accel-sim-mem: 13G
# - args: --seed=2020 --dist=0 --operation=gemm --m=2560 --n=64 --k=2560 --op_class=tensorop --iterations=5 --provider=cutlass
# accel-sim-mem: 13G
# - args: --seed=2020 --dist=0 --operation=gemm --m=2560 --n=128 --k=2560 --op_class=tensorop --iterations=5 --provider=cutlass
# accel-sim-mem: 13G
# - args: --seed=2020 --dist=0 --operation=gemm --m=2560 --n=512 --k=2560 --op_class=tensorop --iterations=5 --provider=cutlass
# accel-sim-mem: 13G
# - args: --seed=2020 --dist=0 --operation=gemm --m=2560 --n=1024 --k=2560 --op_class=tensorop --iterations=5 --provider=cutlass
# accel-sim-mem: 13G
# - args: --seed=2020 --dist=0 --operation=gemm --m=2560 --n=2056 --k=2560 --op_class=tensorop --iterations=5 --provider=cutlass
# accel-sim-mem: 13G
# - args: --seed=2020 --dist=0 --operation=gemm --m=4096 --n=16 --k=4096 --op_class=tensorop --iterations=5 --provider=cutlass
# accel-sim-mem: 13G
# - args: --seed=2020 --dist=0 --operation=gemm --m=4096 --n=32 --k=4096 --op_class=tensorop --iterations=5 --provider=cutlass
# accel-sim-mem: 13G
# - args: --seed=2020 --dist=0 --operation=gemm --m=4096 --n=64 --k=4096 --op_class=tensorop --iterations=5 --provider=cutlass
# accel-sim-mem: 13G
# - args: --seed=2020 --dist=0 --operation=gemm --m=4096 --n=128 --k=4096 --op_class=tensorop --iterations=5 --provider=cutlass
# accel-sim-mem: 13G
# - args: --seed=2020 --dist=0 --operation=gemm --m=4096 --n=512 --k=4096 --op_class=tensorop --iterations=5 --provider=cutlass
# accel-sim-mem: 13G
# - args: --seed=2020 --dist=0 --operation=gemm --m=4096 --n=4096 --k=4096 --op_class=tensorop --iterations=5 --provider=cutlass
# accel-sim-mem: 13G

## Not sure how much memory the following apps take - just letting them go with the default

Expand Down
8 changes: 4 additions & 4 deletions util/job_launching/apps/define-power.yml
Original file line number Diff line number Diff line change
Expand Up @@ -206,13 +206,13 @@ cutlass_5_trace_validation:
data_dirs: "$ACCELSIM_ROOT/../util/accelwattch/accelwattch_benchmarks/data_dirs/"
execs:
- cutlass_perf_test_k1:
- args: --seed=2020 --dist=0 --m=2560 --n=16 --k=2560 --kernels=wmma_gemm_nn --iterations=5 --providers=cutlass
- args: --seed=2020 --dist=0 --m=2560 --n=16 --k=2560 --operation=gemm --op_class=tensorop --iterations=5 --providers=cutlass
accel-sim-mem: 5G
- cutlass_perf_test_k2:
- args: --seed=2020 --dist=0 --m=4096 --n=128 --k=4096 --kernels=wmma_gemm_nn --iterations=5 --providers=cutlass
accel-sim-mem: 5G
- args: --seed=2020 --dist=0 --m=4096 --n=128 --k=4096 --operation=gemm --op_class=tensorop --iterations=5 --providers=cutlass
accel-sim-mem: 5G
- cutlass_perf_test_k3:
- args: --seed=2020 --dist=0 --m=2560 --n=512 --k=2560 --kernels=wmma_gemm_nn --iterations=5 --providers=cutlass
- args: --seed=2020 --dist=0 --m=2560 --n=512 --k=2560 --operation=gemm --op_class=tensorop --iterations=5 --providers=cutlass
accel-sim-mem: 5G

Deepbench_validation:
Expand Down

0 comments on commit 086e674

Please sign in to comment.