Skip to content

Commit

Permalink
runtime: prefix the world too
Browse files Browse the repository at this point in the history
  • Loading branch information
Hugobros3 committed Oct 14, 2024
1 parent 1242fe1 commit 9859232
Show file tree
Hide file tree
Showing 19 changed files with 182 additions and 180 deletions.
38 changes: 19 additions & 19 deletions include/shady/runtime.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,44 +11,44 @@ typedef struct {
bool allow_no_devices;
} RuntimeConfig;

RuntimeConfig default_runtime_config();
void cli_parse_runtime_config(RuntimeConfig* config, int* pargc, char** argv);
RuntimeConfig shd_rt_default_config();
void shd_rt_cli_parse_runtime_config(RuntimeConfig* config, int* pargc, char** argv);

typedef struct Runtime_ Runtime;
typedef struct Device_ Device;
typedef struct Program_ Program;
typedef struct Command_ Command;
typedef struct Buffer_ Buffer;

Runtime* initialize_runtime(RuntimeConfig config);
void shutdown_runtime(Runtime*);
Runtime* shd_rt_initialize(RuntimeConfig config);
void shd_rt_shutdown(Runtime* runtime);

size_t device_count(Runtime*);
Device* get_device(Runtime*, size_t i);
Device* get_an_device(Runtime*);
const char* get_device_name(Device*);
size_t shd_rt_device_count(Runtime* r);
Device* shd_rt_get_device(Runtime* r, size_t i);
Device* shd_rt_get_an_device(Runtime* r);
const char* shd_rt_get_device_name(Device* d);

typedef struct CompilerConfig_ CompilerConfig;
typedef struct Module_ Module;

Program* new_program_from_module(Runtime*, const CompilerConfig*, Module*);
Program* shd_rt_new_program_from_module(Runtime* runtime, const CompilerConfig* base_config, Module* mod);

typedef struct {
uint64_t* profiled_gpu_time;
} ExtraKernelOptions;

Command* launch_kernel(Program*, Device*, const char* entry_point, int dimx, int dimy, int dimz, int args_count, void** args, ExtraKernelOptions*);
bool wait_completion(Command*);
Command* shd_rt_launch_kernel(Program* p, Device* d, const char* entry_point, int dimx, int dimy, int dimz, int args_count, void** args, ExtraKernelOptions* extra_options);
bool shd_rt_wait_completion(Command* cmd);

Buffer* allocate_buffer_device(Device*, size_t);
bool can_import_host_memory(Device*);
Buffer* import_buffer_host(Device*, void*, size_t);
void destroy_buffer(Buffer*);
Buffer* shd_rt_allocate_buffer_device(Device* device, size_t bytes);
bool shd_rt_can_import_host_memory(Device* device);
Buffer* shd_rt_import_buffer_host(Device* device, void* ptr, size_t bytes);
void shd_rt_destroy_buffer(Buffer* buf);

void* get_buffer_host_pointer(Buffer* buf);
uint64_t get_buffer_device_pointer(Buffer* buf);
void* shd_rt_get_buffer_host_pointer(Buffer* buf);
uint64_t shd_rt_get_buffer_device_pointer(Buffer* buf);

bool copy_to_buffer(Buffer* dst, size_t buffer_offset, void* src, size_t size);
bool copy_from_buffer(Buffer* src, size_t buffer_offset, void* dst, size_t size);
bool shd_rt_copy_to_buffer(Buffer* dst, size_t buffer_offset, void* src, size_t size);
bool shd_rt_copy_from_buffer(Buffer* src, size_t buffer_offset, void* dst, size_t size);

#endif
26 changes: 13 additions & 13 deletions samples/aobench/ao_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -98,58 +98,58 @@ void render_device(Args* args, TEXEL_T *img, int w, int h, int nsubsamples, Stri

shd_info_print("Shady checkerboard test starting...\n");

Runtime* runtime = initialize_runtime(args->runtime_config);
Device* device = get_device(runtime, args->common_app_args.device);
Runtime* runtime = shd_rt_initialize(args->runtime_config);
Device* device = shd_rt_get_device(runtime, args->common_app_args.device);
assert(device);

img[0] = 69;
shd_info_print("malloc'd address is: %zu\n", (size_t) img);

Buffer* buf;
if (import_memory)
buf = import_buffer_host(device, img, sizeof(*img) * WIDTH * HEIGHT * 3);
buf = shd_rt_import_buffer_host(device, img, sizeof(*img) * WIDTH * HEIGHT * 3);
else
buf = allocate_buffer_device(device, sizeof(*img) * WIDTH * HEIGHT * 3);
buf = shd_rt_allocate_buffer_device(device, sizeof(*img) * WIDTH * HEIGHT * 3);

uint64_t buf_addr = get_buffer_device_pointer(buf);
uint64_t buf_addr = shd_rt_get_buffer_device_pointer(buf);

shd_info_print("Device-side address is: %zu\n", buf_addr);

Module* m;
CHECK(shd_driver_load_source_file_from_filename(&args->compiler_config, path, "aobench", &m) == NoError, return);
Program* program = new_program_from_module(runtime, &args->compiler_config, m);
Program* program = shd_rt_new_program_from_module(runtime, &args->compiler_config, m);

// run it twice to compile everything and benefit from caches
wait_completion(launch_kernel(program, device, "aobench_kernel", WIDTH / BLOCK_SIZE, HEIGHT / BLOCK_SIZE, 1, 1, (void*[]) { &buf_addr }, NULL));
shd_rt_wait_completion(shd_rt_launch_kernel(program, device, "aobench_kernel", WIDTH / BLOCK_SIZE, HEIGHT / BLOCK_SIZE, 1, 1, (void* []) { &buf_addr }, NULL));
uint64_t tsn = shd_get_time_nano();
uint64_t profiled_gpu_time = 0;
ExtraKernelOptions extra_kernel_options = {
.profiled_gpu_time = &profiled_gpu_time
};
wait_completion(launch_kernel(program, device, "aobench_kernel", WIDTH / BLOCK_SIZE, HEIGHT / BLOCK_SIZE, 1, 1, (void*[]) { &buf_addr }, &extra_kernel_options));
shd_rt_wait_completion(shd_rt_launch_kernel(program, device, "aobench_kernel", WIDTH / BLOCK_SIZE, HEIGHT / BLOCK_SIZE, 1, 1, (void* []) { &buf_addr }, &extra_kernel_options));
uint64_t tpn = shd_get_time_nano();
shd_info_print("device rendering took %dus (gpu time: %dus)\n", (tpn - tsn) / 1000, profiled_gpu_time / 1000);

if (!import_memory)
copy_from_buffer(buf, 0, img, sizeof(*img) * WIDTH * HEIGHT * 3);
shd_rt_copy_from_buffer(buf, 0, img, sizeof(*img) * WIDTH * HEIGHT * 3);
shd_debug_print("data %d\n", (int) img[0]);
destroy_buffer(buf);
shd_rt_destroy_buffer(buf);

shutdown_runtime(runtime);
shd_rt_shutdown(runtime);
}

int main(int argc, char **argv) {
shd_log_set_level(INFO);
Args args = {
.compiler_config = shd_default_compiler_config(),
.runtime_config = default_runtime_config(),
.runtime_config = shd_rt_default_config(),
};

args.compiler_config.input_cf.restructure_with_heuristics = true;

shd_parse_common_args(&argc, argv);
shd_parse_compiler_config_args(&args.compiler_config, &argc, argv);
cli_parse_runtime_config(&args.runtime_config, &argc, argv);
shd_rt_cli_parse_runtime_config(&args.runtime_config, &argc, argv);
cli_parse_common_app_arguments(&args.common_app_args, &argc, argv);

bool do_host = false, do_ispc = false, do_device = false, do_all = true;
Expand Down
24 changes: 12 additions & 12 deletions samples/checkerboard/checkerboard.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,25 +44,25 @@ int main(int argc, char **argv)
shd_log_set_level(INFO);
CompilerConfig compiler_config = shd_default_compiler_config();

RuntimeConfig runtime_config = default_runtime_config();
RuntimeConfig runtime_config = shd_rt_default_config();

shd_parse_common_args(&argc, argv);
shd_parse_compiler_config_args(&compiler_config, &argc, argv);
cli_parse_runtime_config(&runtime_config, &argc, argv);
shd_rt_cli_parse_runtime_config(&runtime_config, &argc, argv);

shd_info_print("Shady checkerboard test starting...\n");

Runtime* runtime = initialize_runtime(runtime_config);
Device* device = get_device(runtime, 0);
Runtime* runtime = shd_rt_initialize(runtime_config);
Device* device = shd_rt_get_device(runtime, 0);
assert(device);

img[0] = 69;
shd_info_print("malloc'd address is: %zu\n", (size_t) img);

int buf_size = sizeof(uint8_t) * WIDTH * HEIGHT * 3;
Buffer* buf = allocate_buffer_device(device, buf_size);
copy_to_buffer(buf, 0, img, buf_size);
uint64_t buf_addr = get_buffer_device_pointer(buf);
Buffer* buf = shd_rt_allocate_buffer_device(device, buf_size);
shd_rt_copy_to_buffer(buf, 0, img, buf_size);
uint64_t buf_addr = shd_rt_get_buffer_device_pointer(buf);

shd_info_print("Device-side address is: %zu\n", buf_addr);

Expand All @@ -72,16 +72,16 @@ int main(int argc, char **argv)
if (shd_driver_load_source_file(&compiler_config, SrcSlim, sizeof(checkerboard_kernel_src), checkerboard_kernel_src,
"checkerboard", &m) != NoError)
shd_error("Failed to load checkerboard module");
Program* program = new_program_from_module(runtime, &compiler_config, m);
Program* program = shd_rt_new_program_from_module(runtime, &compiler_config, m);

wait_completion(launch_kernel(program, device, "checkerboard", 16, 16, 1, 1, (void*[]) { &buf_addr }, NULL));
shd_rt_wait_completion(shd_rt_launch_kernel(program, device, "checkerboard", 16, 16, 1, 1, (void* []) { &buf_addr }, NULL));

copy_from_buffer(buf, 0, img, buf_size);
shd_rt_copy_from_buffer(buf, 0, img, buf_size);
shd_info_print("data %d\n", (int) img[0]);

destroy_buffer(buf);
shd_rt_destroy_buffer(buf);

shutdown_runtime(runtime);
shd_rt_shutdown(runtime);
saveppm("checkerboard.ppm", WIDTH, HEIGHT, img);
shd_destroy_ir_arena(a);
free(img);
Expand Down
10 changes: 5 additions & 5 deletions src/runtime/cuda/cuda_runtime.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ bool cuda_command_wait(CudaCommand* command) {
return true;
}

CudaCommand* shd_cuda_launch_kernel(CudaDevice* device, Program* p, String entry_point, int dimx, int dimy, int dimz, int args_count, void** args, ExtraKernelOptions* options) {
static CudaCommand* shd_cuda_launch_kernel(CudaDevice* device, Program* p, String entry_point, int dimx, int dimy, int dimz, int args_count, void** args, ExtraKernelOptions* options) {
CudaKernel* kernel = shd_cuda_get_specialized_program(device, p, entry_point);

CudaCommand* cmd = calloc(sizeof(CudaCommand), 1);
Expand Down Expand Up @@ -76,9 +76,9 @@ static CudaDevice* create_cuda_device(CudaBackend* b, int ordinal) {
.base = {
.get_name = (const char*(*)(Device*)) cuda_device_get_name,
.cleanup = (void(*)(Device*)) cuda_device_cleanup,
.allocate_buffer = (Buffer* (*)(Device*, size_t)) shd_cuda_allocate_buffer,
.can_import_host_memory = (bool (*)(Device*)) shd_cuda_can_import_host_memory,
.import_host_memory_as_buffer = (Buffer* (*)(Device*, void*, size_t)) shd_cuda_import_host_memory,
.allocate_buffer = (Buffer* (*)(Device*, size_t)) shd_rt_cuda_allocate_buffer,
.can_import_host_memory = (bool (*)(Device*)) shd_rt_cuda_can_import_host_memory,
.import_host_memory_as_buffer = (Buffer* (*)(Device*, void*, size_t)) shd_rt_cuda_import_host_memory,
.launch_kernel = (Command*(*)(Device*, Program*, String, int, int, int, int, void**, ExtraKernelOptions*)) shd_cuda_launch_kernel,
},
.handle = handle,
Expand Down Expand Up @@ -108,7 +108,7 @@ static bool probe_cuda_devices(CudaBackend* b) {
return true;
}

Backend* initialize_cuda_backend(Runtime* base) {
Backend* shd_rt_initialize_cuda_backend(Runtime* base) {
CudaBackend* backend = malloc(sizeof(CudaBackend));
memset(backend, 0, sizeof(CudaBackend));
backend->base = (Backend) {
Expand Down
6 changes: 3 additions & 3 deletions src/runtime/cuda/cuda_runtime_buffer.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ static CudaBuffer* new_buffer_common(size_t size) {
return buffer;
}

CudaBuffer* shd_cuda_allocate_buffer(CudaDevice* device, size_t size) {
CudaBuffer* shd_rt_cuda_allocate_buffer(CudaDevice* device, size_t size) {
CUdeviceptr device_ptr;
CHECK_CUDA(cuMemAlloc(&device_ptr, size), return NULL);
CudaBuffer* buffer = new_buffer_common(size);
Expand All @@ -56,7 +56,7 @@ CudaBuffer* shd_cuda_allocate_buffer(CudaDevice* device, size_t size) {
return buffer;
}

CudaBuffer* shd_cuda_import_host_memory(CudaDevice* device, void* host_ptr, size_t size) {
CudaBuffer* shd_rt_cuda_import_host_memory(CudaDevice* device, void* host_ptr, size_t size) {
CUdeviceptr device_ptr;
CHECK_CUDA(cuMemHostRegister(host_ptr, size, CU_MEMHOSTREGISTER_DEVICEMAP), return NULL);
CHECK_CUDA(cuMemHostGetDevicePointer(&device_ptr, host_ptr, 0), return NULL);
Expand All @@ -68,6 +68,6 @@ CudaBuffer* shd_cuda_import_host_memory(CudaDevice* device, void* host_ptr, size
return buffer;
}

bool shd_cuda_can_import_host_memory(CudaDevice* d) {
bool shd_rt_cuda_can_import_host_memory(CudaDevice* d) {
return true;
}
10 changes: 5 additions & 5 deletions src/runtime/cuda/cuda_runtime_private.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,11 @@ typedef struct {
CUfunction entry_point_function;
} CudaKernel;

CudaBuffer* shd_cuda_allocate_buffer(CudaDevice*, size_t size);
CudaBuffer* shd_cuda_import_host_memory(CudaDevice*, void* host_ptr, size_t size);
bool shd_cuda_can_import_host_memory(CudaDevice*);
CudaBuffer* shd_rt_cuda_allocate_buffer(CudaDevice*, size_t size);
CudaBuffer* shd_rt_cuda_import_host_memory(CudaDevice*, void* host_ptr, size_t size);
bool shd_rt_cuda_can_import_host_memory(CudaDevice*);

CudaKernel* shd_cuda_get_specialized_program(CudaDevice*, Program*, String ep);
bool shd_cuda_destroy_specialized_kernel(CudaKernel*);
CudaKernel* shd_rt_cuda_get_specialized_program(CudaDevice*, Program*, String ep);
bool shd_rt_cuda_destroy_specialized_kernel(CudaKernel*);

#endif
4 changes: 2 additions & 2 deletions src/runtime/cuda/cuda_runtime_program.c
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ static CudaKernel* create_specialized_program(CudaDevice* device, SpecProgramKey
return kernel;
}

CudaKernel* shd_cuda_get_specialized_program(CudaDevice* device, Program* program, String entry_point) {
CudaKernel* shd_rt_cuda_get_specialized_program(CudaDevice* device, Program* program, String entry_point) {
SpecProgramKey key = { .base = program, .entry_point = entry_point };
CudaKernel** found = find_value_dict(SpecProgramKey, CudaKernel*, device->specialized_programs, key);
if (found)
Expand All @@ -157,7 +157,7 @@ CudaKernel* shd_cuda_get_specialized_program(CudaDevice* device, Program* progra
return spec;
}

bool shd_cuda_destroy_specialized_kernel(CudaKernel* kernel) {
bool shd_rt_cuda_destroy_specialized_kernel(CudaKernel* kernel) {
free(kernel->cuda_code);
free(kernel->ptx);
CHECK_CUDA(cuModuleUnload(kernel->cuda_module), return false);
Expand Down
49 changes: 22 additions & 27 deletions src/runtime/runtime.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#include <string.h>
#include <assert.h>

Runtime* initialize_runtime(RuntimeConfig config) {
Runtime* shd_rt_initialize(RuntimeConfig config) {
Runtime* runtime = malloc(sizeof(Runtime));
memset(runtime, 0, sizeof(Runtime));
runtime->config = config;
Expand All @@ -15,12 +15,12 @@ Runtime* initialize_runtime(RuntimeConfig config) {
runtime->programs = shd_new_list(Program*);

#if VK_BACKEND_PRESENT
Backend* vk_backend = initialize_vk_backend(runtime);
Backend* vk_backend = shd_rt_initialize_vk_backend(runtime);
CHECK(vk_backend, goto init_fail_free);
shd_list_append(Backend*, runtime->backends, vk_backend);
#endif
#if CUDA_BACKEND_PRESENT
Backend* cuda_backend = initialize_cuda_backend(runtime);
Backend* cuda_backend = shd_rt_initialize_cuda_backend(runtime);
CHECK(cuda_backend, goto init_fail_free);
append_list(Backend*, runtime->backends, cuda_backend);
#endif
Expand All @@ -34,7 +34,7 @@ Runtime* initialize_runtime(RuntimeConfig config) {
return NULL;
}

void shutdown_runtime(Runtime* runtime) {
void shd_rt_shutdown(Runtime* runtime) {
if (!runtime) return;

// TODO force wait outstanding dispatches ?
Expand All @@ -45,7 +45,7 @@ void shutdown_runtime(Runtime* runtime) {
shd_destroy_list(runtime->devices);

for (size_t i = 0; i < shd_list_count(runtime->programs); i++) {
unload_program(shd_read_list(Program*, runtime->programs)[i]);
shd_rt_unload_program(shd_read_list(Program*, runtime->programs)[i]);
}
shd_destroy_list(runtime->programs);

Expand All @@ -56,44 +56,39 @@ void shutdown_runtime(Runtime* runtime) {
free(runtime);
}

size_t device_count(Runtime* r) {
size_t shd_rt_device_count(Runtime* r) {
return shd_list_count(r->devices);
}

Device* get_device(Runtime* r, size_t i) {
assert(i < device_count(r));
Device* shd_rt_get_device(Runtime* r, size_t i) {
assert(i < shd_rt_device_count(r));
return shd_read_list(Device*, r->devices)[i];
}

Device* get_an_device(Runtime* r) {
assert(device_count(r) > 0);
return get_device(r, 0);
Device* shd_rt_get_an_device(Runtime* r) {
assert(shd_rt_device_count(r) > 0);
return shd_rt_get_device(r, 0);
}

// Virtual functions ...

const char* get_device_name(Device* d) { return d->get_name(d); }
const char* shd_rt_get_device_name(Device* d) { return d->get_name(d); }

Command* launch_kernel(Program* p, Device* d, const char* entry_point, int dimx, int dimy, int dimz, int args_count, void** args, ExtraKernelOptions* extra_options) {
Command* shd_rt_launch_kernel(Program* p, Device* d, const char* entry_point, int dimx, int dimy, int dimz, int args_count, void** args, ExtraKernelOptions* extra_options) {
return d->launch_kernel(d, p, entry_point, dimx, dimy, dimz, args_count, args, extra_options);
}

bool wait_completion(Command* cmd) { return cmd->wait_for_completion(cmd); }
bool shd_rt_wait_completion(Command* cmd) { return cmd->wait_for_completion(cmd); }

bool can_import_host_memory(Device* device) { return device->can_import_host_memory(device); }
bool shd_rt_can_import_host_memory(Device* device) { return device->can_import_host_memory(device); }

Buffer* allocate_buffer_device(Device* device, size_t bytes) { return device->allocate_buffer(device, bytes); }
Buffer* import_buffer_host(Device* device, void* ptr, size_t bytes) { return device->import_host_memory_as_buffer(device, ptr, bytes); }
Buffer* shd_rt_allocate_buffer_device(Device* device, size_t bytes) { return device->allocate_buffer(device, bytes); }
Buffer* shd_rt_import_buffer_host(Device* device, void* ptr, size_t bytes) { return device->import_host_memory_as_buffer(device, ptr, bytes); }

void destroy_buffer(Buffer* buf) { buf->destroy(buf); }
void shd_rt_destroy_buffer(Buffer* buf) { buf->destroy(buf); }

void* get_buffer_host_pointer(Buffer* buf) { return buf->get_host_ptr(buf); }
uint64_t get_buffer_device_pointer(Buffer* buf) { return buf->get_device_ptr(buf); }
void* shd_rt_get_buffer_host_pointer(Buffer* buf) { return buf->get_host_ptr(buf); }
uint64_t shd_rt_get_buffer_device_pointer(Buffer* buf) { return buf->get_device_ptr(buf); }

bool copy_to_buffer(Buffer* dst, size_t buffer_offset, void* src, size_t size) {
return dst->copy_into(dst, buffer_offset, src, size);
}

bool copy_from_buffer(Buffer* src, size_t buffer_offset, void* dst, size_t size) {
return src->copy_from(src, buffer_offset, dst, size);
}
bool shd_rt_copy_to_buffer(Buffer* dst, size_t buffer_offset, void* src, size_t size) { return dst->copy_into(dst, buffer_offset, src, size); }
bool shd_rt_copy_from_buffer(Buffer* src, size_t buffer_offset, void* dst, size_t size) { return src->copy_from(src, buffer_offset, dst, size); }
Loading

0 comments on commit 9859232

Please sign in to comment.