Merge branch 'dev' into dev-ci

accel-sim · Jun 22, 2024 · cd3f0a5 · cd3f0a5
2 parents da5ed4c + 61ba35c
commit cd3f0a5
Show file tree

Hide file tree

Showing 13 changed files with 456 additions and 236 deletions.
diff --git a/.gitignore b/.gitignore
@@ -9,3 +9,6 @@ sim_run_*
 gpucomputingsdk_4.2.9_linux.run
 util/job_launching/*.txt
 gpu-simulator/gpgpu-sim
+.vscode/*
+extern
+gpu-simulator/accel_sim.pyi
diff --git a/gpu-simulator/CMakeLists.txt b/gpu-simulator/CMakeLists.txt
@@ -0,0 +1,93 @@
+cmake_minimum_required(VERSION 3.17)
+
+# Project name and version
+project(Accel-Sim
+        VERSION 1.2.0 
+        DESCRIPTION "Accel-Sim" 
+        HOMEPAGE_URL https://github.com/accel-sim/accel-sim-framework
+        LANGUAGES CXX)
+
+# Specify the C++ standard
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED True)
+
+if($ENV{ACCELSIM_CONFIG} STREQUAL "debug")
+    set(CMAKE_BUILD_TYPE Debug)
+else()
+    set(CMAKE_BUILD_TYPE Release)
+endif()
+
+# check envrionment variable string compare
+if(${CMAKE_BUILD_TYPE} STREQUAL "Debug")
+    add_compile_definitions(DEBUG=1)
+    add_compile_options(-Wall -O0 -g3 -fPIC)
+else()
+    add_compile_definitions(DEBUG=0)
+    add_compile_options(-Wall -O3 -g3 -fPIC)
+endif()
+
+# run command
+execute_process(
+    COMMAND git log --abbrev-commit -n 1
+    COMMAND head -1
+    COMMAND sed -re "s/commit (.*)/\\1/"
+    OUTPUT_VARIABLE GIT_COMMIT
+    OUTPUT_STRIP_TRAILING_WHITESPACE
+    )
+execute_process(
+    COMMAND git diff --numstat
+    COMMAND wc
+    COMMAND sed -re "s/^\\s+([0-9]+).*/\\1./"
+    OUTPUT_VARIABLE GIT_FILES_CHANGED_A
+    OUTPUT_STRIP_TRAILING_WHITESPACE
+    )
+execute_process(
+    COMMAND git diff --numstat --cached
+    COMMAND wc
+    COMMAND sed -re "s/^\\s+([0-9]+).*/\\1/"
+    OUTPUT_VARIABLE GIT_FILES_CHANGED
+    OUTPUT_STRIP_TRAILING_WHITESPACE
+    )
+execute_process(
+    COMMAND date --iso-8601=minutes
+    OUTPUT_VARIABLE TIME
+    OUTPUT_STRIP_TRAILING_WHITESPACE
+    )
+
+set(ACCELSIM_BUILD accelsim-commit-${GIT_COMMIT}_modified_${GIT_FILES_CHANGED_A}${GIT_FILES_CHANGED}_${TIME})
+
+file(WRITE ${CMAKE_BINARY_DIR}/accelsim_version.h "const char *g_accelsim_version=\"${ACCELSIM_BUILD}\";")
+
+add_subdirectory($ENV{GPGPUSIM_ROOT})
+add_subdirectory(trace-driven)
+add_subdirectory(trace-parser)
+add_subdirectory(extern/pybind11)
+
+
+include_directories($ENV{CUDA_INSTALL_PATH}/include)
+include_directories($ENV{GPGPUSIM_ROOT}/libcuda)
+include_directories($ENV{GPGPUSIM_ROOT}/src)
+include_directories(${CMAKE_BINARY_DIR})
+include_directories(ISA_Def)
+include_directories(trace-driven)
+include_directories(trace-parser)
+
+
+add_executable(accel-sim.out accel-sim.cc main.cc)
+target_link_libraries(accel-sim.out PUBLIC cuda ptxsim gpgpusim intersim accelwattch entrypoint)
+target_link_libraries(accel-sim.out PUBLIC -lm -lz -lGL -pthread)
+target_link_libraries(accel-sim.out PUBLIC trace-driven trace-parser)
+
+pybind11_add_module(accel_sim ./accel-sim.cc ./python_wrapper/python_wrapper.cc)
+target_link_libraries(accel_sim PRIVATE cuda ptxsim gpgpusim intersim accelwattch entrypoint)
+target_link_libraries(accel_sim PRIVATE trace-driven trace-parser)
+
+# allow failure for stubgen
+add_custom_target(gen_pyi ALL
+    COMMAND $ENV{HOME}/.local/bin/stubgen -m accel_sim -o . || (exit 0)
+    DEPENDS accel_sim
+    WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
+    )
+
+
+install(TARGETS accel-sim.out DESTINATION ${CMAKE_SOURCE_DIR}/bin/$ENV{ACCELSIM_CONFIG})
diff --git a/gpu-simulator/Makefile b/gpu-simulator/Makefile
@@ -37,11 +37,15 @@ else
 endif
 
 ifeq ($(DEBUG),1)
-	CXXFLAGS = -Wall -O0 -g3 -fPIC -std=c++11
+	CXXFLAGS = -Wall -O0 -g3 -fPIC -std=c++17
 else
-	CXXFLAGS = -Wall -O3 -g3 -fPIC -std=c++11
+	CXXFLAGS = -Wall -O3 -g3 -fPIC -std=c++17
 endif
 
+CXXFLAGS+=-I./trace-driven -I./trace-parser -I$(GPGPUSIM_ROOT)/libcuda -I$(GPGPUSIM_ROOT)/src -I$(CUDA_INSTALL_PATH)/include -I$(BUILD_DIR)
+
+LIBS+=-L$(GPGPUSIM_ROOT)/lib/$(GPGPUSIM_CONFIG)/ -lcudart -lm -lz -lGL -pthread $(BUILD_DIR)/*.o 
+
 all: $(BIN_DIR)/accel-sim.out
 
 $(BUILD_DIR)/main.makedepend: depend makedirs
@@ -59,11 +63,8 @@ checkenv: makedirs
 		exit 1; \
 	fi
 
-$(BIN_DIR)/accel-sim.out: trace-driven trace-parser gpgpu-sim makedirs $(BUILD_DIR)/main.o version
-	$(CXX) -std=c++0x -o $(BIN_DIR)/accel-sim.out  -L$(GPGPUSIM_ROOT)/lib/$(GPGPUSIM_CONFIG)/ -lcudart -lm -lz -lGL -pthread $(BUILD_DIR)/*.o
-
-$(BUILD_DIR)/main.o: main.cc version
-	$(CXX) $(CXXFLAGS)  -I$(BUILD_DIR) -I./trace-driven -I./trace-parser -I$(GPGPUSIM_ROOT)/libcuda -I$(GPGPUSIM_ROOT)/src -I$(CUDA_INSTALL_PATH)/include -c main.cc -o $(BUILD_DIR)/main.o
+$(BIN_DIR)/accel-sim.out: trace-driven trace-parser gpgpu-sim makedirs version
+	$(CXX) $(CXXFLAGS) $(LIBS) -o $(BIN_DIR)/accel-sim.out accel-sim.cc main.cc 
 
 version:
 	echo "const char *g_accelsim_version=\"$(ACCELSIM_BUILD)\";" > $(BUILD_DIR)/accelsim_version.h

diff --git a/gpu-simulator/accel-sim.cc b/gpu-simulator/accel-sim.cc
@@ -0,0 +1,226 @@
+#include "accel-sim.h"
+#include "accelsim_version.h"
+
+accel_sim_framework::accel_sim_framework(std::string config_file,
+                                          std::string trace_file) {
+  std::cout << "Accel-Sim [build " << g_accelsim_version << "]";
+  m_gpgpu_context = new gpgpu_context();
+
+  // mimic argv
+  unsigned argc = 5;
+  const char *argv[] = {"accel-sim.out", "-config", config_file.c_str(),
+                        "-trace", trace_file.c_str()};
+
+  gpgpu_sim *m_gpgpu_sim =
+      gpgpu_trace_sim_init_perf_model(argc, argv, m_gpgpu_context, &tconfig);
+  m_gpgpu_sim->init();
+
+  tracer = trace_parser(tconfig.get_traces_filename());
+
+  tconfig.parse_config();
+
+  init();
+}
+
+accel_sim_framework::accel_sim_framework(int argc, const char **argv) {
+  std::cout << "Accel-Sim [build " << g_accelsim_version << "]";
+  m_gpgpu_context = new gpgpu_context();
+
+  m_gpgpu_sim =
+      gpgpu_trace_sim_init_perf_model(argc, argv, m_gpgpu_context, &tconfig);
+  m_gpgpu_sim->init();
+
+  tracer = trace_parser(tconfig.get_traces_filename());
+
+  tconfig.parse_config();
+
+  init();
+}
+
+void accel_sim_framework::simulation_loop() {
+  // for each kernel
+  // load file
+  // parse and create kernel info
+  // launch
+  // while loop till the end of the end kernel execution
+  // prints stats
+
+  while (commandlist_index < commandlist.size() || !kernels_info.empty()) {
+    parse_commandlist();
+
+    // Launch all kernels within window that are on a stream that isn't already
+    // running
+    for (auto k : kernels_info) {
+      bool stream_busy = false;
+      for (auto s : busy_streams) {
+        if (s == k->get_cuda_stream_id()) stream_busy = true;
+      }
+      if (!stream_busy && m_gpgpu_sim->can_start_kernel() &&
+          !k->was_launched()) {
+        std::cout << "launching kernel name: " << k->get_name()
+                  << " uid: " << k->get_uid() << std::endl;
+        m_gpgpu_sim->launch(k);
+        k->set_launched();
+        busy_streams.push_back(k->get_cuda_stream_id());
+      }
+    }
+
+    unsigned finished_kernel_uid = simulate();
+    // cleanup finished kernel
+    if (finished_kernel_uid || m_gpgpu_sim->cycle_insn_cta_max_hit() ||
+        !m_gpgpu_sim->active()) {
+      cleanup(finished_kernel_uid);
+    }
+
+    if (sim_cycles) {
+      m_gpgpu_sim->update_stats();
+      m_gpgpu_context->print_simulation_time();
+    }
+
+    if (m_gpgpu_sim->cycle_insn_cta_max_hit()) {
+      printf(
+          "GPGPU-Sim: ** break due to reaching the maximum cycles (or "
+          "instructions) **\n");
+      fflush(stdout);
+      break;
+    }
+  }
+}
+
+void accel_sim_framework::parse_commandlist() {
+  // gulp up as many commands as possible - either cpu_gpu_mem_copy
+  // or kernel_launch - until the vector "kernels_info" has reached
+  // the window_size or we have read every command from commandlist
+  while (kernels_info.size() < window_size && commandlist_index < commandlist.size()) {
+    trace_kernel_info_t *kernel_info = NULL;
+    if (commandlist[commandlist_index].m_type == command_type::cpu_gpu_mem_copy) {
+      size_t addre, Bcount;
+      tracer.parse_memcpy_info(commandlist[commandlist_index].command_string, addre, Bcount);
+      std::cout << "launching memcpy command : "
+                << commandlist[commandlist_index].command_string << std::endl;
+      m_gpgpu_sim->perf_memcpy_to_gpu(addre, Bcount);
+      commandlist_index++;
+    } else if (commandlist[commandlist_index].m_type == command_type::kernel_launch) {
+      // Read trace header info for window_size number of kernels
+      kernel_trace_t *kernel_trace_info =
+          tracer.parse_kernel_info(commandlist[commandlist_index].command_string);
+      kernel_info = create_kernel_info(kernel_trace_info, m_gpgpu_context,
+                                       &tconfig, &tracer);
+      kernels_info.push_back(kernel_info);
+      std::cout << "Header info loaded for kernel command : "
+                << commandlist[commandlist_index].command_string << std::endl;
+      commandlist_index++;
+    } else {
+      // unsupported commands will fail the simulation
+      assert(0 && "Undefined Command");
+    }
+  }
+}
+
+void accel_sim_framework::cleanup(unsigned finished_kernel) {
+  trace_kernel_info_t *k = NULL;
+  for (unsigned j = 0; j < kernels_info.size(); j++) {
+    k = kernels_info.at(j);
+    if (k->get_uid() == finished_kernel ||
+        m_gpgpu_sim->cycle_insn_cta_max_hit() || !m_gpgpu_sim->active()) {
+      for (unsigned int l = 0; l < busy_streams.size(); l++) {
+        if (busy_streams.at(l) == k->get_cuda_stream_id()) {
+          busy_streams.erase(busy_streams.begin() + l);
+          break;
+        }
+      }
+      tracer.kernel_finalizer(k->get_trace_info());
+      delete k->entry();
+      delete k;
+      kernels_info.erase(kernels_info.begin() + j);
+      if (!m_gpgpu_sim->cycle_insn_cta_max_hit() && m_gpgpu_sim->active())
+        break;
+    }
+  }
+  assert(k);
+  m_gpgpu_sim->print_stats();
+}
+
+unsigned accel_sim_framework::simulate() {
+  unsigned finished_kernel_uid = 0;
+  do {
+    if (!m_gpgpu_sim->active()) break;
+
+    // performance simulation
+    if (m_gpgpu_sim->active()) {
+      m_gpgpu_sim->cycle();
+      sim_cycles = true;
+      m_gpgpu_sim->deadlock_check();
+    } else {
+      if (m_gpgpu_sim->cycle_insn_cta_max_hit()) {
+        m_gpgpu_context->the_gpgpusim->g_stream_manager
+            ->stop_all_running_kernels();
+        break;
+      }
+    }
+
+    active = m_gpgpu_sim->active();
+    finished_kernel_uid = m_gpgpu_sim->finished_kernel();
+  } while (active && !finished_kernel_uid);
+  return finished_kernel_uid;
+}
+
+trace_kernel_info_t *accel_sim_framework::create_kernel_info(kernel_trace_t *kernel_trace_info,
+                                        gpgpu_context *m_gpgpu_context,
+                                        trace_config *config,
+                                        trace_parser *parser) {
+  gpgpu_ptx_sim_info info;
+  info.smem = kernel_trace_info->shmem;
+  info.regs = kernel_trace_info->nregs;
+  dim3 gridDim(kernel_trace_info->grid_dim_x, kernel_trace_info->grid_dim_y,
+               kernel_trace_info->grid_dim_z);
+  dim3 blockDim(kernel_trace_info->tb_dim_x, kernel_trace_info->tb_dim_y,
+                kernel_trace_info->tb_dim_z);
+  trace_function_info *function_info =
+      new trace_function_info(info, m_gpgpu_context);
+  function_info->set_name(kernel_trace_info->kernel_name.c_str());
+  trace_kernel_info_t *kernel_info = new trace_kernel_info_t(
+      gridDim, blockDim, function_info, parser, config, kernel_trace_info);
+
+  return kernel_info;
+}
+
+gpgpu_sim *accel_sim_framework::gpgpu_trace_sim_init_perf_model(
+    int argc, const char *argv[], gpgpu_context *m_gpgpu_context,
+    trace_config *m_config) {
+  srand(1);
+  print_splash();
+
+  option_parser_t opp = option_parser_create();
+
+  m_gpgpu_context->ptx_reg_options(opp);
+  m_gpgpu_context->func_sim->ptx_opcocde_latency_options(opp);
+
+  icnt_reg_options(opp);
+
+  m_gpgpu_context->the_gpgpusim->g_the_gpu_config =
+      new gpgpu_sim_config(m_gpgpu_context);
+  m_gpgpu_context->the_gpgpusim->g_the_gpu_config->reg_options(
+      opp);  // register GPU microrachitecture options
+  m_config->reg_options(opp);
+
+  option_parser_cmdline(opp, argc, argv);  // parse configuration options
+  fprintf(stdout, "GPGPU-Sim: Configuration options:\n\n");
+  option_parser_print(opp, stdout);
+  // Set the Numeric locale to a standard locale where a decimal point is a
+  // "dot" not a "comma" so it does the parsing correctly independent of the
+  // system environment variables
+  assert(setlocale(LC_NUMERIC, "C"));
+  m_gpgpu_context->the_gpgpusim->g_the_gpu_config->init();
+
+  m_gpgpu_context->the_gpgpusim->g_the_gpu = new trace_gpgpu_sim(
+      *(m_gpgpu_context->the_gpgpusim->g_the_gpu_config), m_gpgpu_context);
+
+  m_gpgpu_context->the_gpgpusim->g_stream_manager =
+      new stream_manager((m_gpgpu_context->the_gpgpusim->g_the_gpu),
+                         m_gpgpu_context->func_sim->g_cuda_launch_blocking);
+
+  m_gpgpu_context->the_gpgpusim->g_simulation_starttime = time((time_t *)NULL);
+
+  return m_gpgpu_context->the_gpgpusim->g_the_gpu;
+}