Skip to content

Commit

Permalink
[WebGPU] Handle device OOM in createBuffer (#17005)
Browse files Browse the repository at this point in the history
  • Loading branch information
CharlieFRuan authored May 17, 2024
1 parent f044eef commit afb6416
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 3 deletions.
15 changes: 15 additions & 0 deletions web/src/runtime.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1014,6 +1014,7 @@ export class Instance implements Disposable {
private asyncifyHandler: AsyncifyHandler;
private initProgressCallback: Array<InitProgressCallback> = [];
private rng: LinearCongruentialGenerator;
private deviceLostIsError = true; // whether device.lost is due to actual error or dispose()

/**
* Internal function(registered by the runtime)
Expand Down Expand Up @@ -1107,11 +1108,14 @@ export class Instance implements Disposable {
}

dispose(): void {
this.deviceLostIsError = false; // prevent dispose to trigger device.lost error
// order matters
// ctx release goes back into lib.
this.ctx.dispose();
this.lib.dispose();
this.deviceLostIsError = true;
}

/**
* Obtain the runtime information in readable format.
*/
Expand Down Expand Up @@ -2094,6 +2098,17 @@ export class Instance implements Disposable {
* @param device The given GPU device.
*/
initWebGPU(device: GPUDevice): void {
device.addEventListener("uncapturederror", (event) => {
console.error("A WebGPU error was not captured: ", event);
});

device.lost.then((info: any) => {
if (this.deviceLostIsError) {
console.error("Device lost, calling Instance.dispose(). Please initialize again. ", info);
this.dispose();
}
});

const webGPUContext = new WebGPUContext(
this.memory, device
);
Expand Down
29 changes: 26 additions & 3 deletions web/src/webgpu.ts
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,29 @@ export async function detectGPUDevice(): Promise<GPUDeviceDetectOutput | undefin
}
}

/**
* Create GPU buffer with `createBuffer()` but with error catching; destroy if error caught.
* @param device The GPUDevice used to create a buffer.
* @param descriptor The GPUBufferDescriptor passed to `createBuffer()`.
* @returns The buffer created by `createBuffer()`.
*
* @note We treat any error occurred at `createBuffer()` fatal and expect the user to handle
* `device.destroy()` with `device.lost.then()`.
*/
function tryCreateBuffer(device: GPUDevice, descriptor: GPUBufferDescriptor) {
device.pushErrorScope("out-of-memory");
device.pushErrorScope("validation");
device.pushErrorScope("internal");

const buffer = device.createBuffer(descriptor);

device.popErrorScope().then((error) => {if (error) {device.destroy(); console.error(error);}});
device.popErrorScope().then((error) => {if (error) {device.destroy(); console.error(error);}});
device.popErrorScope().then((error) => {if (error) {device.destroy(); console.error(error);}});

return buffer;
}

const canvasRenderWGSL = `
@group(0) @binding(0) var my_sampler : sampler;
@group(0) @binding(1) var my_texture : texture_2d<f32>;
Expand Down Expand Up @@ -504,7 +527,7 @@ export class WebGPUContext {

if (buffer == undefined) {
// create uniform buffer
buffer = this.device.createBuffer({
buffer = tryCreateBuffer(this.device, {
size: allocSize,
usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST,
});
Expand Down Expand Up @@ -779,7 +802,7 @@ export class WebGPUContext {
if (nbytes == 0) {
nbytes = 1;
}
const buffer = this.device.createBuffer({
const buffer = tryCreateBuffer(this.device, {
size: nbytes,
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST,
});
Expand Down Expand Up @@ -833,7 +856,7 @@ export class WebGPUContext {
nbytes: number
): void {
// Perhaps it would be more useful to resuse a staging buffer?
const gpuTemp = this.device.createBuffer({
const gpuTemp = tryCreateBuffer(this.device, {
size: nbytes,
usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST,
});
Expand Down

0 comments on commit afb6416

Please sign in to comment.