Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cutlass update #18

Merged
merged 11 commits into from
May 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
The diff you're trying to view is too large. We only load the first 3000 changed files.
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
[submodule "src/cuda/cutlass-bench/tools/external/googletest"]
path = src/cuda/cutlass-bench/tools/external/googletest
url = https://github.com/google/googletest.git
[submodule "src/cuda/cutlass-bench"]
path = src/cuda/cutlass-bench
url = https://github.com/NVIDIA/cutlass.git
23 changes: 17 additions & 6 deletions src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -426,13 +426,24 @@ deeplearning:

cutlass:
mkdir -p $(BINDIR)/$(BINSUBDIR)/
cd ../ && git submodule init && git submodule update
$(SETENV) mkdir -p cuda/cutlass-bench/build && cd cuda/cutlass-bench/build && cmake .. -DUSE_GPGPUSIM=1 -DCUTLASS_NVCC_ARCHS=70 && make cutlass_perf_test
cd cuda/cutlass-bench/build/tools/test/perf && ln -s -f ../../../../binary.sh . && ./binary.sh
cp cuda/cutlass-bench/build/tools/test/perf/cutlass_perf_test $(BINDIR)/$(BINSUBDIR)/
cp cuda/cutlass-bench/build/tools/test/perf/cutlass_perf_test $(BINDIR)/$(BINSUBDIR)/cutlass_perf_test_k1
echo $(BINDIR)
git submodule init && git submodule update
#$(SETENV) mkdir -p cuda/cutlass-bench/build && cd cuda/cutlass-bench/build && cmake .. -DUSE_GPGPUSIM=1 -DCUTLASS_NVCC_ARCHS=70 && make cutlass_profiler -j12
$(SETENV) mkdir -p cuda/cutlass-bench/build && cd cuda/cutlass-bench/build && cmake .. -DUSE_GPGPUSIM=1 -DCUTLASS_NVCC_ARCHS=70
# cd cuda/cutlass-bench/build/tools/profiler && ln -s -f ../../../../binary.sh . && ./binary.sh
echo $(BINDIR)
cp cuda/cutlass-bench/build/tools/profiler/cutlass_profiler $(BINDIR)/$(BINSUBDIR)/cutlass_perf_test_k1
cp $(BINDIR)/$(BINSUBDIR)/cutlass_perf_test_k1 $(BINDIR)/$(BINSUBDIR)/cutlass_perf_test_k2
cp $(BINDIR)/$(BINSUBDIR)/cutlass_perf_test_k1 $(BINDIR)/$(BINSUBDIR)/cutlass_perf_test_k3


cutlass_examples_turing:
mkdir -p $(BINDIR)/$(BINSUBDIR)/
git submodule init && git submodule update
$(SETENV) mkdir -p cuda/cutlass-bench/build && cd cuda/cutlass-bench/build && cmake .. -DUSE_GPGPUSIM=1 -DCUTLASS_NVCC_ARCHS=75 && cd ./examples/09_turing_tensorop_conv2dfprop && make 09_turing_tensorop_conv2dfprop
echo $(BINDIR)
cp cuda/cutlass-bench/build/examples/09_turing_tensorop_conv2dfprop/09_turing_tensorop_conv2dfprop $(BINDIR)/$(BINSUBDIR)/turing_tensorop_conv2dfrop

# Maybe we should use submodules for this - but I have heard a lot of horor stories about these..
# For now - lets just clone if we don't have it and set the SHA we want.
heterosync:
Expand Down Expand Up @@ -484,7 +495,7 @@ clean_heterosync:
rm -rf cuda/heterosync

clean_cutlass:
rm -fr cuda/cutlass-bench/build
rm -rf cuda/cutlass-bench/build

clean_deeplearning:
$(SETENV) make $(MAKE_ARGS) noinline=$(noinline) -C cuda/cudnn/mnist clean
Expand Down
1 change: 1 addition & 0 deletions src/cuda/cutlass-bench
Submodule cutlass-bench added at 47a3eb
Loading