Skip to content

Commit

Permalink
[SYCL] Implement tracing for in-memory kernel and program cache (#15925)
Browse files Browse the repository at this point in the history
I'm working on implementing cache eviction and it gets difficult to
debug/write test cases without tracing. So, this PR implements tracing
for in-memory kernel and program cache.
  • Loading branch information
uditagarwal97 authored Nov 1, 2024
1 parent eff2ff2 commit d130196
Show file tree
Hide file tree
Showing 2 changed files with 110 additions and 3 deletions.
54 changes: 51 additions & 3 deletions sycl/source/detail/kernel_program_cache.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#pragma once

#include "sycl/exception.hpp"
#include <detail/config.hpp>
#include <detail/kernel_arg_mask.hpp>
#include <detail/platform_impl.hpp>
#include <sycl/detail/common.hpp>
Expand All @@ -19,8 +20,10 @@

#include <atomic>
#include <condition_variable>
#include <iomanip>
#include <mutex>
#include <set>
#include <thread>
#include <type_traits>

#include <boost/unordered/unordered_flat_map.hpp>
Expand Down Expand Up @@ -176,6 +179,42 @@ class KernelProgramCache {

void setContextPtr(const ContextPtr &AContext) { MParentContext = AContext; }

// Sends message to std:cerr stream when SYCL_CACHE_TRACE environemnt is
// set.
static inline void traceProgram(const std::string &Msg,
const ProgramCacheKeyT &CacheKey) {
if (!SYCLConfig<SYCL_CACHE_TRACE>::isTraceInMemCache())
return;

int ImageId = CacheKey.first.second;
std::stringstream DeviceList;
for (const auto &Device : CacheKey.second)
DeviceList << "0x" << std::setbase(16)
<< reinterpret_cast<uintptr_t>(Device) << ",";

std::string Identifier = "[Key:{imageId = " + std::to_string(ImageId) +
",urDevice = " + DeviceList.str() + "}]: ";

std::cerr << "[In-Memory Cache][Thread Id:" << std::this_thread::get_id()
<< "][Program Cache]" << Identifier << Msg << std::endl;
}

// Sends message to std:cerr stream when SYCL_CACHE_TRACE environemnt is
// set.
static inline void traceKernel(const std::string &Msg,
const std::string &KernelName,
bool IsKernelFastCache = false) {
if (!SYCLConfig<SYCL_CACHE_TRACE>::isTraceInMemCache())
return;

std::string Identifier =
"[IsFastCache: " + std::to_string(IsKernelFastCache) +
"][Key:{Name = " + KernelName + "}]: ";

std::cerr << "[In-Memory Cache][Thread Id:" << std::this_thread::get_id()
<< "][Kernel Cache]" << Identifier << Msg << std::endl;
}

Locked<ProgramCache> acquireCachedPrograms() {
return {MCachedPrograms, MProgramCacheMutex};
}
Expand All @@ -195,7 +234,9 @@ class KernelProgramCache {
CommonProgramKeyT CommonKey =
std::make_pair(CacheKey.first.second, CacheKey.second);
ProgCache.KeyMap.emplace(CommonKey, CacheKey);
}
traceProgram("Program inserted.", CacheKey);
} else
traceProgram("Program fetched.", CacheKey);
return std::make_pair(It->second, DidInsert);
}

Expand All @@ -217,7 +258,9 @@ class KernelProgramCache {
CommonProgramKeyT CommonKey =
std::make_pair(CacheKey.first.second, CacheKey.second);
ProgCache.KeyMap.emplace(CommonKey, CacheKey);
}
traceProgram("Program inserted.", CacheKey);
} else
traceProgram("Program fetched.", CacheKey);
return DidInsert;
}

Expand All @@ -227,8 +270,11 @@ class KernelProgramCache {
auto LockedCache = acquireKernelsPerProgramCache();
auto &Cache = LockedCache.get()[Program];
auto [It, DidInsert] = Cache.try_emplace(KernelName, nullptr);
if (DidInsert)
if (DidInsert) {
It->second = std::make_shared<KernelBuildResult>(getAdapter());
traceKernel("Kernel inserted.", KernelName);
} else
traceKernel("Kernel fetched.", KernelName);
return std::make_pair(It->second, DidInsert);
}

Expand All @@ -237,6 +283,7 @@ class KernelProgramCache {
std::unique_lock<std::mutex> Lock(MKernelFastCacheMutex);
auto It = MKernelFastCache.find(CacheKey);
if (It != MKernelFastCache.end()) {
traceKernel("Kernel fetched.", std::get<3>(CacheKey), true);
return It->second;
}
return std::make_tuple(nullptr, nullptr, nullptr, nullptr);
Expand All @@ -247,6 +294,7 @@ class KernelProgramCache {
std::unique_lock<std::mutex> Lock(MKernelFastCacheMutex);
// if no insertion took place, thus some other thread has already inserted
// smth in the cache
traceKernel("Kernel inserted.", std::get<3>(CacheKey), true);
MKernelFastCache.emplace(CacheKey, CacheVal);
}

Expand Down
59 changes: 59 additions & 0 deletions sycl/test-e2e/KernelAndProgram/trace_kernel_program_cache.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
// Tests tracing of in-memory kernel and program cache.

// RUN: %{build} -o %t.out

// There should be no tracing output when SYCL_CACHE_IN_MEM is not set
// or SYCL_CACHE_TRACE is set to 0.

// RUN: env SYCL_CACHE_IN_MEM=0 %{run} %t.out 2> %t.trace1
// RUN: FileCheck --allow-empty --input-file=%t.trace1 --implicit-check-not "In-Memory Cache" %s
// RUN: env SYCL_CACHE_TRACE=0 %{run} %t.out 2> %t.trace2
// RUN: FileCheck --allow-empty --input-file=%t.trace2 --implicit-check-not "In-Memory Cache" %s

// RUN: env SYCL_CACHE_TRACE=2 %{run} %t.out 2> %t.trace3
// RUN: FileCheck %s --input-file=%t.trace3 --check-prefix=CHECK-CACHE-TRACE

#include <sycl/detail/core.hpp>

#include <sycl/specialization_id.hpp>
#include <sycl/usm.hpp>

using namespace sycl;

constexpr specialization_id<int> spec_id;

int main() {
queue q;

// Check program insertion into cache and kernel insertion into fast and
// regular kernel cache.
// CHECK-CACHE-TRACE: [In-Memory Cache][Thread Id:{{.*}}][Program Cache][Key:{{.*}}]: Program inserted.
// CHECK-CACHE-TRACE: [In-Memory Cache][Thread Id:{{.*}}][Kernel Cache][IsFastCache: 0][Key:{Name = [[KERNELNAME1:.*]]]: Kernel inserted.
// CHECK-CACHE-TRACE: [In-Memory Cache][Thread Id:{{.*}}][Kernel Cache][IsFastCache: 1][Key:{Name = [[KERNELNAME1]]]: Kernel inserted.

// In the 2nd and 3rd invocation of this loop, the kernel should be fetched
// from fast kernel cache.
// CHECK-CACHE-TRACE: [In-Memory Cache][Thread Id:{{.*}}][Kernel Cache][IsFastCache: 1][Key:{Name = [[KERNELNAME1]]]: Kernel fetched.
// CHECK-CACHE-TRACE: [In-Memory Cache][Thread Id:{{.*}}][Kernel Cache][IsFastCache: 1][Key:{Name = [[KERNELNAME1]]]: Kernel fetched.
for (int i = 0; i < 3; i++)
q.single_task([] {}).wait();

auto *p = malloc_device<int>(1, q);

// Check program and kernel insertion into cache. There should be different
// programs for different iterations of this loop, because of the different
// specialization constants.
// CHECK-CACHE-TRACE: [In-Memory Cache][Thread Id:{{.*}}][Program Cache][Key:{{.*}}]: Program inserted.
// CHECK-CACHE-TRACE: [In-Memory Cache][Thread Id:{{.*}}][Kernel Cache][IsFastCache: 0][Key:{Name = [[KERNELNAME2:.*]]]: Kernel inserted.
// CHECK-CACHE-TRACE: [In-Memory Cache][Thread Id:{{.*}}][Program Cache][Key:{{.*}}]: Program inserted.
// CHECK-CACHE-TRACE: [In-Memory Cache][Thread Id:{{.*}}][Kernel Cache][IsFastCache: 0][Key:{Name = [[KERNELNAME2]]]: Kernel inserted.
for (int i = 0; i < 2; ++i)
q.submit([&](handler &cgh) {
cgh.set_specialization_constant<spec_id>(i);
cgh.parallel_for(1, [=](auto, kernel_handler kh) {
*p = kh.get_specialization_constant<spec_id>();
});
}).wait();

free(p, q);
}

0 comments on commit d130196

Please sign in to comment.