From bea672966e3da06f1bb27ce5f7fe5b77c4f7c0a7 Mon Sep 17 00:00:00 2001
From: fengwuyao <131706622+fengwuyao@users.noreply.github.com>
Date: Thu, 4 May 2023 15:57:38 -0700
Subject: [PATCH 01/32] Webgl backend for bitwise AND API (#7654)

* Test commit

* Support ensureShape in tfjs

* Applied changes

* Updates doc

* Updates doc

* Updates doc

* Add an example

* add dif length and null shape tests

* fix the lint error

* Update executor tests

* Update the header

* Implement bitwiseAnd() ops and add kernel in Webcpu backEnd

* Fix format

* Change to only support int32 type

* add kernel in webgl backend

* Update webGL kernels for bitwise AND
---
 tfjs-backend-webgl/src/kernel_utils/shared.ts |  2 +
 tfjs-backend-webgl/src/kernels/BitwiseAnd.ts  | 73 +++++++++++++++++++
 .../src/register_all_kernels.ts               |  2 +
 tfjs-backend-webgl/src/setup_test.ts          |  2 +-
 4 files changed, 78 insertions(+), 1 deletion(-)
 create mode 100644 tfjs-backend-webgl/src/kernels/BitwiseAnd.ts

diff --git a/tfjs-backend-webgl/src/kernel_utils/shared.ts b/tfjs-backend-webgl/src/kernel_utils/shared.ts
index b9a7a5a59f2..327a932a897 100644
--- a/tfjs-backend-webgl/src/kernel_utils/shared.ts
+++ b/tfjs-backend-webgl/src/kernel_utils/shared.ts
@@ -31,6 +31,7 @@ const {
   addImpl: addImplCPU,
   bincountImpl: bincountImplCPU,
   bincountReduceImpl: bincountReduceImplCPU,
+  bitwiseAndImpl: bitwiseAndImplCPU,
   castImpl: castImplCPU,
   ceilImpl: ceilImplCPU,
   concatImpl: concatImplCPU,
@@ -82,6 +83,7 @@ export {
   addImplCPU,
   bincountImplCPU,
   bincountReduceImplCPU,
+  bitwiseAndImplCPU,
   castImplCPU,
   ceilImplCPU,
   concatImplCPU,
diff --git a/tfjs-backend-webgl/src/kernels/BitwiseAnd.ts b/tfjs-backend-webgl/src/kernels/BitwiseAnd.ts
new file mode 100644
index 00000000000..0c72abeb244
--- /dev/null
+++ b/tfjs-backend-webgl/src/kernels/BitwiseAnd.ts
@@ -0,0 +1,73 @@
+/**
+ * @license
+ * Copyright 2023 Google LLC.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * =============================================================================
+ */
+
+import {BitwiseAnd, BitwiseAndInputs, env, KernelConfig, KernelFunc, TensorInfo, TypedArray} from '@tensorflow/tfjs-core';
+import {MathBackendWebGL} from '../backend_webgl';
+import {BinaryOpProgram} from '../binaryop_gpu';
+import {BinaryOpPackedProgram} from '../binaryop_packed_gpu';
+import {bitwiseAndImplCPU as cpuBitwiseAnd} from '../kernel_utils/shared';
+
+export const BITWISEAND = `
+  int r = int(a.r) & int(b.r);
+  int g = int(a.g) & int(b.g);
+  int rb = int(a.b) & int(b.b);
+  int ra = int(a.a) & int(b.a);
+  return vec4(r, g, rb, ra);
+`;
+
+export const BITWISEAND_UNPACKED = `
+  return float(int(a.r) & int(b.r));
+`;
+
+export function bitwiseAnd(args: {
+  inputs: BitwiseAndInputs,
+  backend: MathBackendWebGL,
+}): TensorInfo {
+  const {inputs, backend} = args;
+  const {a, b} = inputs;
+  const shouldUsePackedProgram = env().getBool('WEBGL_PACK_BINARY_OPERATIONS');
+  const versionNumber = env().getNumber('WEBGL_VERSION');
+
+  // The type of a and b are ensured to be `int32` in core, therefore no need to
+  // consider other type situations.
+  if ((backend.shouldExecuteOnCPU([a, b])) || versionNumber === 1) {
+    const aVals = backend.texData.get(a.dataId).values as TypedArray;
+    const bVals = backend.texData.get(b.dataId).values as TypedArray;
+    const [outValues, outShape] =
+        cpuBitwiseAnd(a.shape, b.shape, aVals, bVals, a.dtype);
+
+    const out = backend.makeTensorInfo(outShape, a.dtype);
+    const outData = backend.texData.get(out.dataId);
+    outData.values = outValues;
+    return out;
+  }
+
+  let program: BinaryOpProgram|BinaryOpPackedProgram;
+  if (shouldUsePackedProgram) {
+    program = new BinaryOpPackedProgram(BITWISEAND, a.shape, b.shape, false);
+  } else {
+    program = new BinaryOpProgram(BITWISEAND_UNPACKED, a.shape, b.shape);
+  }
+
+  return backend.runWebGLProgram(program, [a, b], a.dtype);
+}
+
+export const bitwiseAndConfig: KernelConfig = {
+  kernelName: BitwiseAnd,
+  backendName: 'webgl',
+  kernelFunc: bitwiseAnd as unknown as KernelFunc
+};
diff --git a/tfjs-backend-webgl/src/register_all_kernels.ts b/tfjs-backend-webgl/src/register_all_kernels.ts
index 9f8e8f4ff39..42b1e0e9ae5 100644
--- a/tfjs-backend-webgl/src/register_all_kernels.ts
+++ b/tfjs-backend-webgl/src/register_all_kernels.ts
@@ -39,6 +39,7 @@ import {batchMatMulConfig} from './kernels/BatchMatMul';
 import {batchNormConfig} from './kernels/BatchNorm';
 import {batchToSpaceNDConfig} from './kernels/BatchToSpaceND';
 import {bincountConfig} from './kernels/Bincount';
+import {bitwiseAndConfig} from './kernels/BitwiseAnd';
 import {broadcastArgsConfig} from './kernels/BroadcastArgs';
 import {castConfig} from './kernels/Cast';
 import {ceilConfig} from './kernels/Ceil';
@@ -211,6 +212,7 @@ const kernelConfigs: KernelConfig[] = [
   batchNormConfig,
   batchToSpaceNDConfig,
   bincountConfig,
+  bitwiseAndConfig,
   broadcastArgsConfig,
   castConfig,
   ceilConfig,
diff --git a/tfjs-backend-webgl/src/setup_test.ts b/tfjs-backend-webgl/src/setup_test.ts
index a22acd98022..343a8e5e9af 100644
--- a/tfjs-backend-webgl/src/setup_test.ts
+++ b/tfjs-backend-webgl/src/setup_test.ts
@@ -36,7 +36,7 @@ const customInclude = (testName: string) => {
     'isBrowser: false', 'dilation gradient',
     'throws when index is out of bound',
     // otsu tests for threshold op is failing on windows
-    'method otsu', 'bitwiseAnd'
+    'method otsu'
   ];
   for (const subStr of toExclude) {
     if (testName.includes(subStr)) {

From c9b746f6fbd8356398f1defa8ac7cfc602156791 Mon Sep 17 00:00:00 2001
From: Xu Xing <xing.xu@intel.com>
Date: Fri, 5 May 2023 10:02:44 +0800
Subject: [PATCH 02/32] [webgpu] Enable parallel compilation (#7191)

FEATURE

* [webgpu] Add parallel compile

Demo code:
  // Parallel compile.
  tf.env().set('ENGINE_COMPILE_ONLY', true);
  const result1 = predict(model);
  await tf.backend().checkCompileCompletion();
  tf.dispose(result1);
  // Actual inference.
  tf.env().set('ENGINE_COMPILE_ONLY', false);
  const result2 = predict(model);
  await result2.data();
  tf.dispose(result2);

* Rename flag to WEBGPU_ENGINE_COMPILE_ONLY
---
 tfjs-backend-webgpu/src/backend_webgpu.ts     | 129 ++++++++++-------
 .../src/backend_webgpu_test.ts                | 133 ++++++++++++++++++
 tfjs-backend-webgpu/src/flags_webgpu.ts       |   3 +
 tfjs-backend-webgpu/src/webgpu_program.ts     |  42 ++++--
 4 files changed, 244 insertions(+), 63 deletions(-)

diff --git a/tfjs-backend-webgpu/src/backend_webgpu.ts b/tfjs-backend-webgpu/src/backend_webgpu.ts
index fc83a5bf00c..bb9b0c62e9f 100644
--- a/tfjs-backend-webgpu/src/backend_webgpu.ts
+++ b/tfjs-backend-webgpu/src/backend_webgpu.ts
@@ -129,7 +129,8 @@ export class WebGPUBackend extends KernelBackend {
   private dummyContext: GPUCanvasContext;
   private tensorDataPendingDisposal: DataId[] = [];
   private static nextDataId = 0;
-  private pipelineCache: {[key: string]: GPUComputePipeline};
+  private pipelineCache:
+      {[key: string]: GPUComputePipeline|Promise<GPUComputePipeline>};
   private programTimersStack: TimerNode[];
   private querySet: GPUQuerySet;
   private stagingPendingDisposal: BufferInfo[] = [];
@@ -356,8 +357,27 @@ export class WebGPUBackend extends KernelBackend {
     return this.currentComputePass;
   }
 
+  // Check if parallel compilation is done.
+  async checkCompileCompletionAsync() {
+    let pipelines: GPUComputePipeline[];
+    try {
+      pipelines = await Promise.all(Object.values(this.pipelineCache));
+    } catch (e) {
+      // TODO: Add test case to catch this exception.
+      throw new Error(e.message);
+    }
+    Object.keys(this.pipelineCache).map((key, i) => {
+      this.pipelineCache[key] = pipelines[i];
+    });
+  }
+
   public async getBufferData(buffer: GPUBuffer, size: number):
       Promise<ArrayBuffer> {
+    if (env().getBool('WEBGPU_ENGINE_COMPILE_ONLY')) {
+      console.warn(
+          'The data may be invalid since WEBGPU_ENGINE_COMPILE_ONLY is true, this can only be called when WEBGPU_ENGINE_COMPILE_ONLY is false');
+      return null;
+    }
     const staging = this.bufferManager.acquireBuffer(
         size, GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ);
     this.ensureCommandEncoderReady();
@@ -888,6 +908,47 @@ export class WebGPUBackend extends KernelBackend {
     this.uploadToGPU(output.dataId);
     program.dispatch = reshapeDispatch(this.device, program);
 
+    const inputsData = inputs.map((input: TensorInfo, i: number) => {
+      if (input.dtype === 'complex64') {
+        throw new Error(
+            `GPGPUProgram does not support complex64 input. For complex64 ` +
+            `dtypes, please separate the program into real and imaginary ` +
+            `parts.`);
+      }
+      this.uploadToGPU(input.dataId);
+
+      return {
+        // Returning dtype from tensorMap because it reflects dtype
+        // of underlying buffer, rather than abstract dtype.
+        dtype: this.tensorMap.get(input.dataId).dtype,
+        shape: input.shape,
+        name: program.variableNames[i]
+      };
+    });
+
+    program.shaderKey =
+        webgpu_program.makeShaderKey(program, inputsData, output);
+
+    const parallelCompilation = env().getBool('WEBGPU_ENGINE_COMPILE_ONLY');
+    if (!(program.shaderKey in this.pipelineCache)) {
+      this.pipelineCache[program.shaderKey] = webgpu_program.compileProgram(
+          this.device, program, inputsData, output, parallelCompilation);
+    }
+    program.pipeline = this.pipelineCache[program.shaderKey];
+
+    if (!parallelCompilation) {
+      this.recordAndSubmit(program, output, inputs, programDefinedUniform);
+    }
+    return output;
+  }
+
+  private recordAndSubmit(
+      program: webgpu_program.WebGPUProgram, output: TensorInfo,
+      inputs: TensorInfo[], programDefinedUniform?: ProgramUniform) {
+    if (program.pipeline instanceof Promise<GPUComputePipeline>) {
+      throw new Error(
+          'Please call checkCompileCompletionAsync to ensure parallel compilation is done!');
+    }
     // There are six kinds of uniforms: NAN, INFINITY, shapes, shape strides,
     // program size, program defined uniforms.
     let programUniform: ProgramUniform = [];
@@ -912,36 +973,6 @@ export class WebGPUBackend extends KernelBackend {
       }
     }
 
-    const inputsData = inputs.map((input: TensorInfo, i: number) => {
-      if (input.dtype === 'complex64') {
-        throw new Error(
-            `GPGPUProgram does not support complex64 input. For complex64 ` +
-            `dtypes, please separate the program into real and imaginary ` +
-            `parts.`);
-      }
-      this.uploadToGPU(input.dataId);
-
-      return {
-        // Returning dtype from tensorMap because it reflects dtype
-        // of underlying buffer, rather than abstract dtype.
-        dtype: this.tensorMap.get(input.dataId).dtype,
-        shape: input.shape,
-        name: program.variableNames[i]
-      };
-    });
-
-    const shaderKey =
-        webgpu_program.makeShaderKey(program, bufferShapes, inputsData, output);
-
-    let pipeline;
-    if (shaderKey in this.pipelineCache) {
-      pipeline = this.pipelineCache[shaderKey];
-    } else {
-      pipeline = webgpu_program.compileProgram(
-          this.device, program, inputsData, output, shaderKey);
-      this.pipelineCache[shaderKey] = pipeline;
-    }
-
     if (programDefinedUniform) {
       programUniform = [...programUniform, ...programDefinedUniform];
     }
@@ -950,49 +981,45 @@ export class WebGPUBackend extends KernelBackend {
       this.makeUniforms(programUniform)
     ];
 
+    inputs.forEach(input => {
+      this.commandQueueOwnedIds.add(input.dataId);
+    });
+    this.commandQueueOwnedIds.add(output.dataId);
+
     const bindGroup = this.device.createBindGroup({
-      layout: pipeline.getBindGroupLayout(0),
+      layout: program.pipeline.getBindGroupLayout(0),
       entries: bindings.map((b, i) => ({binding: i, resource: b})),
     });
-
     this.ensureCommandEncoderReady();
     const pass = this.getComputePass();
+
     const shouldTimeProgram = this.activeTimers != null;
-    if (shouldTimeProgram) {
-      if (this.supportTimeQuery) {
-        // tslint:disable-next-line:no-any
-        (pass as any).writeTimestamp(this.querySet, 0);
-      }
+    if (shouldTimeProgram && this.supportTimeQuery) {
+      // tslint:disable-next-line:no-any
+      (pass as any).writeTimestamp(this.querySet, 0);
     }
-    pass.setPipeline(pipeline);
+
+    pass.setPipeline(program.pipeline);
     pass.setBindGroup(0, bindGroup);
     pass.dispatchWorkgroups(
         program.dispatch[0], program.dispatch[1], program.dispatch[2]);
-    if (shouldTimeProgram) {
-      if (this.supportTimeQuery) {
-        // tslint:disable-next-line:no-any
-        (pass as any).writeTimestamp(this.querySet, 1);
-      }
+
+    if (shouldTimeProgram && this.supportTimeQuery) {
+      // tslint:disable-next-line:no-any
+      (pass as any).writeTimestamp(this.querySet, 1);
     }
     this.dispatchNumberInEncoder++;
 
-    inputs.forEach(input => {
-      this.commandQueueOwnedIds.add(input.dataId);
-    });
-    this.commandQueueOwnedIds.add(output.dataId);
-
     if (env().get('WEBGPU_DEFERRED_SUBMIT_BATCH_SIZE') as
         number <= this.dispatchNumberInEncoder) {
       this.submitQueue();
     }
-
     if (shouldTimeProgram) {
       this.activeTimers.push({
         name: program.constructor.name,
         query: this.getQueryTime(this.querySet)
       });
     }
-    return output;
   }
 
   async getTimeFromQuerySet(querySet: GPUQuerySet) {
diff --git a/tfjs-backend-webgpu/src/backend_webgpu_test.ts b/tfjs-backend-webgpu/src/backend_webgpu_test.ts
index a46eac7bdf6..9eebbc4ac0d 100644
--- a/tfjs-backend-webgpu/src/backend_webgpu_test.ts
+++ b/tfjs-backend-webgpu/src/backend_webgpu_test.ts
@@ -353,6 +353,139 @@ describeWebGPU('keeping data on gpu ', () => {
   });
 });
 
+async function parallelCompilationCommon(webGPUBackend: WebGPUBackend) {
+  const startNumBytes = (tf.memory() as WebGPUMemoryInfo).numBytesInGPU;
+  const startTensor = tf.memory().numTensors;
+  const startDataBuckets = webGPUBackend.numDataIds();
+
+  const a1 = tf.tensor1d([1, 1, 1]);
+  const b1 = tf.tensor1d([1, 1, 1]);
+
+  // Parallel compile.
+  tf.env().set('WEBGPU_ENGINE_COMPILE_ONLY', true);
+  const c1 = tf.add(a1, b1);
+  await webGPUBackend.checkCompileCompletionAsync();
+
+  // Actual inference.
+  tf.env().set('WEBGPU_ENGINE_COMPILE_ONLY', false);
+  const c2 = tf.add(a1, b1);
+  expectArraysEqual(await c2.data(), [2, 2, 2]);
+
+  tf.dispose([a1, b1, c1, c2]);
+  const endNumBytes = (tf.memory() as WebGPUMemoryInfo).numBytesInGPU;
+  const endTensor = tf.memory().numTensors;
+  const endDataBuckets = webGPUBackend.numDataIds();
+
+  // We only check numBytesInGPU. For parallel compilation,
+  // numBytesInGPUAllocated will be more because of the two pass
+  // uploadToGPU, but they will all be freed, resulting in endNumbytes equal
+  // to startNumBytes.
+  expect(startNumBytes).toEqual(endNumBytes);
+  expect(startTensor).toEqual(endTensor);
+  expect(endDataBuckets).toEqual(startDataBuckets);
+}
+
+describeWebGPU('parallel compilation', () => {
+  let prevBackend: string;
+  let savedWebGPUCPUForward: boolean;
+  let savedEngineCompileOnly: boolean;
+  let webGPUBackend: WebGPUBackend;
+  const customWebGPUBackendName = 'test-parallel';
+
+  beforeAll(() => {
+    prevBackend = tf.getBackend();
+  });
+
+  beforeEach(async () => {
+    const adapter = await navigator.gpu.requestAdapter({});
+    const device = await adapter.requestDevice({});
+    webGPUBackend = new WebGPUBackend(device);
+
+    tf.copyRegisteredKernels('webgpu', customWebGPUBackendName);
+    tf.registerBackend(customWebGPUBackendName, () => webGPUBackend);
+    tf.setBackend('test-parallel');
+
+    savedWebGPUCPUForward = tf.env().get('WEBGPU_CPU_FORWARD') as boolean;
+    savedEngineCompileOnly =
+        tf.env().get('WEBGPU_ENGINE_COMPILE_ONLY') as boolean;
+    tf.env().set('WEBGPU_CPU_FORWARD', false);
+    await tf.ready();
+  });
+
+  afterEach(() => {
+    tf.env().set('WEBGPU_CPU_FORWARD', savedWebGPUCPUForward);
+    tf.env().set('WEBGPU_ENGINE_COMPILE_ONLY', savedEngineCompileOnly);
+    tf.setBackend(prevBackend);
+    tf.removeBackend(customWebGPUBackendName);
+  });
+
+  it('should work if pipeline cache not exist.', async () => {
+    await parallelCompilationCommon(webGPUBackend);
+  });
+
+  it('should work if pipeline cache exists.', async () => {
+    // This will create pipeline cache.
+    const a0 = tf.tensor1d([1, 1, 1]);
+    const b0 = tf.tensor1d([1, 1, 1]);
+    const c0 = tf.add(a0, b0);
+    const data = await c0.data();
+    expectArraysClose(data, [2, 2, 2]);
+
+    await parallelCompilationCommon(webGPUBackend);
+  });
+
+  it('should work when running parallel compile again', async () => {
+    // This will create pipeline cache.
+    const a0 = tf.tensor1d([1, 1, 1]);
+    const b0 = tf.tensor1d([1, 1, 1]);
+    const c0 = tf.add(a0, b0);
+    const data = await c0.data();
+    expectArraysClose(data, [2, 2, 2]);
+
+    await parallelCompilationCommon(webGPUBackend);
+    await parallelCompilationCommon(webGPUBackend);
+  });
+
+  it('should not work if not call checkCompileCompletionAsync', async () => {
+    const a1 = tf.tensor1d([1, 1, 1]);
+    const b1 = tf.tensor1d([1, 1, 1]);
+
+    // Parallel compile but not call await (tf.backend() as
+    // WebGPUBackend).checkCompileCompletionAsync().
+    tf.env().set('WEBGPU_ENGINE_COMPILE_ONLY', true);
+    tf.add(a1, b1);
+
+    // Actual inference.
+    tf.env().set('WEBGPU_ENGINE_COMPILE_ONLY', false);
+    expect(() => tf.add(a1, b1))
+        .toThrowError(
+            'Please call checkCompileCompletionAsync to ensure parallel compilation is done!');
+  });
+
+  it('read data is invalid if parallel compilation is true', async () => {
+    const a1 = tf.tensor1d([1, 1, 1]);
+    const b1 = tf.tensor1d([1, 1, 1]);
+
+    // Parallel compile.
+    tf.env().set('WEBGPU_ENGINE_COMPILE_ONLY', true);
+    const c1 = tf.add(a1, b1);
+    await (tf.backend() as WebGPUBackend).checkCompileCompletionAsync();
+    // Read data is invalid.
+    expectArraysClose((await c1.data()).length, 0);
+  });
+
+  it('checkCompileCompletionAsync is nop if parallel compilation is false',
+     async () => {
+       const a1 = tf.tensor1d([1, 1, 1]);
+       const b1 = tf.tensor1d([1, 1, 1]);
+       // If parallel compilation is false, checkCompileCompletionAsync is nop.
+       tf.env().set('WEBGPU_ENGINE_COMPILE_ONLY', false);
+       const c1 = tf.add(a1, b1);
+       await (tf.backend() as WebGPUBackend).checkCompileCompletionAsync();
+       expectArraysClose(await c1.data(), [2, 2, 2]);
+     });
+});
+
 function createStagingGPUBufferFromData(
     device: GPUDevice, data: number[], dtype: tf.DataType) {
   const bytesPerElement = 4;
diff --git a/tfjs-backend-webgpu/src/flags_webgpu.ts b/tfjs-backend-webgpu/src/flags_webgpu.ts
index 93f73e86ddf..eb11bfcbbb2 100644
--- a/tfjs-backend-webgpu/src/flags_webgpu.ts
+++ b/tfjs-backend-webgpu/src/flags_webgpu.ts
@@ -89,3 +89,6 @@ ENV.registerFlag('WEBGPU_CONV_SEPARATE_IM2COL_SHADER', () => false);
  * etc.). 'unary,conv2d' to print both unary and conv2d.
  */
 ENV.registerFlag('WEBGPU_PRINT_SHADER', () => '');
+
+/** Experimental flag, whether enter compile only phase. */
+ENV.registerFlag('WEBGPU_ENGINE_COMPILE_ONLY', () => false);
diff --git a/tfjs-backend-webgpu/src/webgpu_program.ts b/tfjs-backend-webgpu/src/webgpu_program.ts
index 862dfe068b5..258c7b07e59 100644
--- a/tfjs-backend-webgpu/src/webgpu_program.ts
+++ b/tfjs-backend-webgpu/src/webgpu_program.ts
@@ -15,7 +15,7 @@
  * =============================================================================
  */
 
-import {backend_util, DataType, env, Rank, ShapeMap, TensorInfo, util} from '@tensorflow/tfjs-core';
+import {backend_util, DataType, DataTypeMap, env, Rank, TensorInfo, util} from '@tensorflow/tfjs-core';
 
 import {symbolicallyComputeStrides} from './shader_util';
 
@@ -49,21 +49,18 @@ export interface WebGPUProgram {
   // Each thread writes to workPerThread * workPerThread locations in the output
   // buffer.
   workPerThread?: number;
+  pipeline?: GPUComputePipeline|Promise<GPUComputePipeline>;
   getUserCode: () => string;
 }
 
 export const compileProgram =
     (device: GPUDevice, program: WebGPUProgram, inputsData: InputInfo[],
-     output: TensorInfo, shaderKey: string): GPUComputePipeline => {
+     output: TensorInfo, parallelCompilation: boolean): GPUComputePipeline|
+    Promise<GPUComputePipeline> => {
       const outputData = {dtype: output.dtype, shape: output.shape};
       const source = makeShader(inputsData, outputData, program);
       const module = device.createShaderModule(
           {code: source, label: program.constructor.name});
-      const pipeline = device.createComputePipeline({
-        compute: {module, entryPoint: '_start'},
-        label: program.constructor.name,
-        layout: 'auto'
-      });
 
       let printShaderString = env().get('WEBGPU_PRINT_SHADER') as string;
       if (printShaderString !== '') {
@@ -71,13 +68,26 @@ export const compileProgram =
         const printShaderArray = printShaderString.split(',');
         if (printShaderString === 'all' ||
             printShaderArray.some(
-                item => shaderKey.toLowerCase().includes(item))) {
-          console.group(shaderKey);
+                item => program.shaderKey.toLowerCase().includes(item))) {
+          console.group(program.shaderKey);
           console.debug(source);
           console.groupEnd();
         }
       }
-      return pipeline;
+
+      if (parallelCompilation) {
+        return device.createComputePipelineAsync({
+          compute: {module, entryPoint: '_start'},
+          label: program.constructor.name,
+          layout: 'auto'
+        });
+      } else {
+        return device.createComputePipeline({
+          compute: {module, entryPoint: '_start'},
+          label: program.constructor.name,
+          layout: 'auto'
+        });
+      }
     };
 
 export const typeSnippet = (component: number, type = 'f32') => {
@@ -326,14 +336,22 @@ function makeShader(
 }
 
 export function makeShaderKey<R extends Rank>(
-    program: WebGPUProgram, shapes: Array<ShapeMap[R]>, inputsData: InputInfo[],
+    program: WebGPUProgram, inputsData: InputInfo[],
     output: TensorInfo): string {
   let key = program.shaderKey;
   if (program.isFromPixels) {
     return key;
   }
 
-  const types = inputsData.map(d => d.dtype).concat(output.dtype);
+  const shapes: number[][] = [];
+  const types: Array<keyof DataTypeMap> = [];
+  inputsData.forEach(element => {
+    shapes.push(element.shape);
+    types.push(element.dtype);
+  });
+  shapes.push(output.shape);
+  types.push(output.dtype);
+
   const broadcastDims =
       inputsData.map(d => backend_util.getBroadcastDims(d.shape, output.shape));
   const inputShapesEqualsOutShape =

From c3f04be743b4b234b7916294a28860d9efd9db47 Mon Sep 17 00:00:00 2001
From: Yang Gu <yang.gu@intel.com>
Date: Sat, 6 May 2023 12:38:43 +0800
Subject: [PATCH 03/32] webgpu: Use self-contained info in GPUBuffer and
 GPUTexture

GPUTexture alreadys includes width, height, format and usage, while GPUBuffer alreadys includes size and usage. So we no longer need extra data structs for them.
defaultGpuBufferUsage() is also removed as it's not that default.
---
 tfjs-backend-webgpu/src/backend_webgpu.ts     | 190 +++++++-----------
 .../src/backend_webgpu_test.ts                |   9 +-
 tfjs-backend-webgpu/src/buffer_manager.ts     |   7 +-
 tfjs-backend-webgpu/src/kernels/FromPixels.ts |  18 +-
 tfjs-backend-webgpu/src/kernels/GatherV2.ts   |  16 +-
 tfjs-backend-webgpu/src/kernels/Slice.ts      |   4 +-
 tfjs-backend-webgpu/src/texture_manager.ts    |   9 +-
 7 files changed, 106 insertions(+), 147 deletions(-)

diff --git a/tfjs-backend-webgpu/src/backend_webgpu.ts b/tfjs-backend-webgpu/src/backend_webgpu.ts
index bb9b0c62e9f..ac68025d57e 100644
--- a/tfjs-backend-webgpu/src/backend_webgpu.ts
+++ b/tfjs-backend-webgpu/src/backend_webgpu.ts
@@ -31,26 +31,12 @@ export interface WebGPUMemoryInfo extends backend_util.MemoryInfo {
   unreliable: boolean;
 }
 
-export type BufferInfo = {
-  size: number,
-  usage: GPUBufferUsageFlags,
-  buffer: GPUBuffer
-};
-
-export type TextureInfo = {
-  width: number,
-  height: number,
-  format: GPUTextureFormat,
-  usage: GPUTextureUsageFlags,
-  texture: GPUTexture|GPUExternalTexture
-};
-
 type TensorData = {
   values: BackendValues,
   dtype: DataType,
   shape: number[],
   refCount: number,
-  resourceInfo?: BufferInfo|TextureInfo,
+  resource?: GPUBuffer|GPUTexture|GPUExternalTexture,
   // external is true means we use the resource provided by users directly
   // (without a copy), so users should be responsible for its release.
   external?: boolean,
@@ -133,9 +119,9 @@ export class WebGPUBackend extends KernelBackend {
       {[key: string]: GPUComputePipeline|Promise<GPUComputePipeline>};
   private programTimersStack: TimerNode[];
   private querySet: GPUQuerySet;
-  private stagingPendingDisposal: BufferInfo[] = [];
+  private stagingPendingDisposal: GPUBuffer[] = [];
   private supportTimeQuery: boolean;
-  private uniformPendingDisposal: BufferInfo[] = [];
+  private uniformPendingDisposal: GPUBuffer[] = [];
   private uploadWaitMs = 0;
 
   private nextDataId(): number {
@@ -189,11 +175,6 @@ export class WebGPUBackend extends KernelBackend {
     return 32;
   }
 
-  defaultGpuBufferUsage(): number {
-    return GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC |
-        GPUBufferUsage.COPY_DST;
-  }
-
   /**
    * Dispose the memory if the dataId has 0 refCount. Return true if the memory
    * is released or memory is not managed in this backend, false if memory is
@@ -243,29 +224,21 @@ export class WebGPUBackend extends KernelBackend {
 
   releaseResource(dataId: DataId) {
     const tensorData = this.tensorMap.get(dataId);
-    if (!tensorData || !tensorData.resourceInfo) {
+    if (!tensorData || !tensorData.resource) {
       return;
     }
+
     // If tensor's resource is from external, do not release.
     if (tensorData.external) {
-      tensorData.resourceInfo = null;
+      tensorData.resource = null;
       return;
     }
-    if ('texture' in tensorData.resourceInfo) {
-      const textureInfo = tensorData.resourceInfo;
-      if (textureInfo.texture instanceof GPUTexture) {
-        this.textureManager.releaseTexture(
-            textureInfo.texture, textureInfo.width, textureInfo.height,
-            textureInfo.format, textureInfo.usage);
-      }
-      textureInfo.texture = null;
-    } else {
-      const bufferInfo = tensorData.resourceInfo;
-      this.bufferManager.releaseBuffer(
-          bufferInfo.buffer, bufferInfo.size, bufferInfo.usage);
-      bufferInfo.buffer = null;
+    if (tensorData.resource instanceof GPUBuffer) {
+      this.bufferManager.releaseBuffer(tensorData.resource);
+    } else if (tensorData.resource instanceof GPUTexture) {
+      this.textureManager.releaseTexture(tensorData.resource);
     }
-    tensorData.resourceInfo = null;
+    tensorData.resource = null;
   }
 
   /** Return refCount of a `TensorData`. */
@@ -327,10 +300,9 @@ export class WebGPUBackend extends KernelBackend {
       this.tensorMap.delete(d);
     });
     this.uniformPendingDisposal.forEach(
-        b => this.bufferManager.releaseBuffer(b.buffer, b.size, b.usage));
+        b => this.bufferManager.releaseBuffer(b));
     this.stagingPendingDisposal.forEach(
-        b =>
-            this.bufferManager.releaseBuffer(b.buffer, b.size, b.usage, false));
+        b => this.bufferManager.releaseBuffer(b, false));
 
     this.tensorDataPendingDisposal = [];
     this.uniformPendingDisposal = [];
@@ -371,27 +343,27 @@ export class WebGPUBackend extends KernelBackend {
     });
   }
 
-  public async getBufferData(buffer: GPUBuffer, size: number):
-      Promise<ArrayBuffer> {
+  public async getBufferData(buffer: GPUBuffer): Promise<ArrayBuffer> {
     if (env().getBool('WEBGPU_ENGINE_COMPILE_ONLY')) {
       console.warn(
           'The data may be invalid since WEBGPU_ENGINE_COMPILE_ONLY is true, this can only be called when WEBGPU_ENGINE_COMPILE_ONLY is false');
       return null;
     }
-    const staging = this.bufferManager.acquireBuffer(
+    const size = buffer.size;
+    const stagingBuffer = this.bufferManager.acquireBuffer(
         size, GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ);
     this.ensureCommandEncoderReady();
     this.ensureComputePassEnded();
-    this.currentCommandEncoder.copyBufferToBuffer(buffer, 0, staging, 0, size);
+    this.currentCommandEncoder.copyBufferToBuffer(
+        buffer, 0, stagingBuffer, 0, size);
     this.submitQueue();
 
-    await staging.mapAsync(GPUMapMode.READ);
-    const values = staging.getMappedRange().slice(0);
+    await stagingBuffer.mapAsync(GPUMapMode.READ);
+    const values = stagingBuffer.getMappedRange().slice(0);
 
-    staging.unmap();
-    if (staging != null) {
-      this.bufferManager.releaseBuffer(
-          staging, size, GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ);
+    stagingBuffer.unmap();
+    if (stagingBuffer != null) {
+      this.bufferManager.releaseBuffer(stagingBuffer);
     }
 
     // Need to get texture from swapChain to enable profiling tool
@@ -436,15 +408,15 @@ export class WebGPUBackend extends KernelBackend {
 
     const alphaModes: GPUCanvasAlphaMode[] = ['opaque', 'premultiplied'];
 
-    const bufferInfo = tensorData.resourceInfo as BufferInfo;
-    const bufSize = bufferInfo.size;
+    const buffer = tensorData.resource as GPUBuffer;
+    const bufferSize = buffer.size;
     util.assert(
-        bufSize % 4 === 0,
+        bufferSize % 4 === 0,
         () => 'Because there is 4 bytes for ' +
             'one pixel, buffer size must be multiple of 4.');
-    const pixelsSize = bufSize / 4;
-    const valsGPU = new ArrayBuffer(bufSize);
-    // TODO: adjust the reading window size according the `bufSize`.
+    const pixelsSize = bufferSize / 4;
+    const valsGPU = new ArrayBuffer(bufferSize);
+    // TODO: adjust the reading window size according the `bufferSize`.
     const canvasWidth = 256, canvasHeight = 256;
     const stagingDeviceStorage: OffscreenCanvas[] =
         alphaModes.map(_ => new OffscreenCanvas(canvasWidth, canvasHeight));
@@ -471,7 +443,7 @@ export class WebGPUBackend extends KernelBackend {
                 this.ensureCommandEncoderReady();
                 this.currentCommandEncoder.copyBufferToTexture(
                     {
-                      buffer: bufferInfo.buffer,
+                      buffer,
                       bytesPerRow,
                       offset,
                     },
@@ -561,8 +533,7 @@ export class WebGPUBackend extends KernelBackend {
       vals = backend_util.mergeRealAndImagArrays(
           realValues as Float32Array, imagValues as Float32Array);
     } else {
-      const bufferInfo = tensorData.resourceInfo as BufferInfo;
-      const data = await this.getBufferData(bufferInfo.buffer, bufferInfo.size);
+      const data = await this.getBufferData(tensorData.resource as GPUBuffer);
       vals = util.convertBackendValuesAndArrayBuffer(data, tensorData.dtype);
     }
     this.convertAndCacheOnCPU(dataId, vals);
@@ -571,7 +542,9 @@ export class WebGPUBackend extends KernelBackend {
 
   // The source GPUBuffer and destination GPUBuffer have the same size and
   // usage.
-  private copyBuffer(srcBuffer: GPUBuffer, size: number, usage: number) {
+  private copyBuffer(srcBuffer: GPUBuffer) {
+    const size = srcBuffer.size;
+    const usage = srcBuffer.usage;
     const dstBuffer = this.bufferManager.acquireBuffer(size, usage);
     this.ensureCommandEncoderReady();
     this.ensureComputePassEnded();
@@ -585,23 +558,27 @@ export class WebGPUBackend extends KernelBackend {
    * Create a TF.js tensor out of an existing WebGPU buffer.
    */
   override createTensorFromGPUData(
-      values: WebGPUData, shape: number[], dtype: DataType): Tensor {
-    let buffer = values.buffer;
+      webGPUData: WebGPUData, shape: number[], dtype: DataType): Tensor {
+    let buffer = webGPUData.buffer;
     if (dtype === 'complex64') {
       throw new Error(`Cannot write to a complex64 dtype. `);
     }
     const dataId = {id: this.nextDataId()};
-    this.tensorMap.set(
-        dataId,
-        {dtype, shape, values: null, refCount: 1, external: values.zeroCopy});
+    this.tensorMap.set(dataId, {
+      dtype,
+      shape,
+      values: null,
+      refCount: 1,
+      external: webGPUData.zeroCopy
+    });
     const tensorData = this.tensorMap.get(dataId);
     const size = webgpu_util.GPUBytesPerElement(tensorData.dtype) *
         util.sizeFromShape(tensorData.shape);
-    if (values.buffer.size < size) {
+    if (webGPUData.buffer.size < size) {
       throw new Error(`GPUBuffer size(${
-          values.buffer.size}) is smaller than tensor size(${size})!`);
+          webGPUData.buffer.size}) is smaller than tensor size(${size})!`);
     } else if (
-        (values.buffer.usage &
+        (webGPUData.buffer.usage &
          (GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC)) !==
         (GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC)) {
       throw new Error(
@@ -609,10 +586,10 @@ export class WebGPUBackend extends KernelBackend {
     }
 
     // Do buffer copy by default.
-    if (values.zeroCopy !== true) {
-      buffer = this.copyBuffer(buffer, size, buffer.usage);
+    if (webGPUData.zeroCopy !== true) {
+      buffer = this.copyBuffer(buffer);
     }
-    tensorData.resourceInfo = {size: buffer.size, usage: buffer.usage, buffer};
+    tensorData.resource = buffer;
     return engine().makeTensorFromDataId(dataId, shape, dtype, this);
   }
 
@@ -622,13 +599,13 @@ export class WebGPUBackend extends KernelBackend {
    */
   override readToGPU(dataId: DataId): GPUData {
     const srcTensorData = this.tensorMap.get(dataId);
-    const {values, dtype, shape, resourceInfo} = srcTensorData;
+    const {values, dtype, shape, resource} = srcTensorData;
 
     if (dtype === 'complex64') {
       throw new Error('Does not support reading buffer for complex64 dtype.');
     }
 
-    if (resourceInfo == null) {
+    if (resource == null) {
       if (values != null) {
         throw new Error('Data is not on GPU but on CPU.');
       } else {
@@ -636,12 +613,14 @@ export class WebGPUBackend extends KernelBackend {
       }
     }
 
-    const size = (resourceInfo as BufferInfo).size;
-    const buffer = this.bufferManager.acquireBuffer(size, resourceInfo.usage);
+    const srcBuffer = resource as GPUBuffer;
+    const size = srcBuffer.size;
+    const usage = srcBuffer.usage;
+    const buffer = this.bufferManager.acquireBuffer(size, usage);
     this.ensureCommandEncoderReady();
     this.ensureComputePassEnded();
     this.currentCommandEncoder.copyBufferToBuffer(
-        (resourceInfo as BufferInfo).buffer, 0, buffer, 0, size);
+        resource as GPUBuffer, 0, buffer, 0, size);
     this.submitQueue();
 
     const tensorInfo = this.makeTensorInfo(shape, dtype);
@@ -649,8 +628,7 @@ export class WebGPUBackend extends KernelBackend {
     const tensorRef = engine().makeTensorFromTensorInfo(tensorInfo);
 
     const tensorData = this.tensorMap.get(tensorInfo.dataId);
-    tensorData
-        .resourceInfo = {size, usage: this.defaultGpuBufferUsage(), buffer};
+    tensorData.resource = buffer;
 
     return {tensorRef, buffer};
   }
@@ -743,16 +721,16 @@ export class WebGPUBackend extends KernelBackend {
     }
 
     const tensorData = this.tensorMap.get(tensor.dataId);
-    if ('texture' in tensorData.resourceInfo) {
-      const info = tensorData.resourceInfo;
-      if (info.texture instanceof GPUExternalTexture) {
-        return info.texture;
-      } else {
-        return info.texture.createView();
-      }
+    const resource = tensorData.resource;
+
+    if (resource instanceof GPUBuffer) {
+      return {buffer: resource};
     }
-    const bufferInfo = tensorData.resourceInfo;
-    return {offset: 0, size: bufferInfo.size, buffer: bufferInfo.buffer};
+    if (resource instanceof GPUTexture) {
+      return resource.createView();
+    }
+    // GPUExternalTexture
+    return resource;
   }
 
   async getQueryTime(query: GPUQuerySet): Promise<number> {
@@ -766,16 +744,17 @@ export class WebGPUBackend extends KernelBackend {
   uploadToGPU(dataId: DataId): void {
     const tensorData = this.tensorMap.get(dataId);
     // Already on the GPU.
-    if (tensorData.resourceInfo) {
+    if (tensorData.resource != null) {
       return;
     }
 
     const size = webgpu_util.GPUBytesPerElement(tensorData.dtype) *
         util.sizeFromShape(tensorData.shape);
     let buffer;
+    const usage = GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC |
+        GPUBufferUsage.COPY_DST;
     if (tensorData.values) {
-      buffer = this.bufferManager.acquireBuffer(
-          size, this.defaultGpuBufferUsage(), true);
+      buffer = this.bufferManager.acquireBuffer(size, usage, true);
       if (buffer.mapState === 'unmapped') {
         const stagingBuffer = this.bufferManager.acquireBuffer(
             size, GPUBufferUsage.MAP_WRITE | GPUBufferUsage.COPY_SRC, true,
@@ -792,11 +771,7 @@ export class WebGPUBackend extends KernelBackend {
         this.currentCommandEncoder.copyBufferToBuffer(
             stagingBuffer, 0, buffer, 0, size);
 
-        this.stagingPendingDisposal.push({
-          size,
-          usage: GPUBufferUsage.MAP_WRITE | GPUBufferUsage.COPY_SRC,
-          buffer: stagingBuffer
-        });
+        this.stagingPendingDisposal.push(stagingBuffer);
       } else {
         const arrayBuffer = buffer.getMappedRange();
         if (tensorData.dtype === 'int32' || tensorData.dtype === 'bool') {
@@ -810,11 +785,9 @@ export class WebGPUBackend extends KernelBackend {
       // Once uploaded, don't store the values on cpu.
       tensorData.values = null;
     } else {
-      buffer =
-          this.bufferManager.acquireBuffer(size, this.defaultGpuBufferUsage());
+      buffer = this.bufferManager.acquireBuffer(size, usage);
     }
-    tensorData
-        .resourceInfo = {size, usage: this.defaultGpuBufferUsage(), buffer};
+    tensorData.resource = buffer;
   }
 
   private makeUniforms(programUniform: ProgramUniform): GPUBindingResource {
@@ -880,13 +853,7 @@ export class WebGPUBackend extends KernelBackend {
     const uniformBuffer = this.bufferManager.acquireBuffer(
         currentOffset, GPUBufferUsage.COPY_DST | GPUBufferUsage.UNIFORM);
     this.queue.writeBuffer(uniformBuffer, 0, arrayBuffer, 0, currentOffset);
-
-    const uniformInfo = {
-      size: currentOffset,
-      usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.UNIFORM,
-      buffer: uniformBuffer
-    };
-    this.uniformPendingDisposal.push(uniformInfo);
+    this.uniformPendingDisposal.push(uniformBuffer);
 
     return {offset: 0, size: currentOffset, buffer: uniformBuffer};
   }
@@ -1037,11 +1004,8 @@ export class WebGPUBackend extends KernelBackend {
     const arrayBuf = new BigUint64Array(dst.getMappedRange());
     const timeElapsedNanos = Number((arrayBuf[1] - arrayBuf[0]));
     dst.unmap();
-    this.bufferManager.releaseBuffer(
-        dst, 16, GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST);
-    this.bufferManager.releaseBuffer(
-        queryBuffer, 16,
-        GPUBufferUsage.COPY_SRC | GPUBufferUsage.QUERY_RESOLVE);
+    this.bufferManager.releaseBuffer(dst);
+    this.bufferManager.releaseBuffer(queryBuffer);
     // Return milliseconds.
     return timeElapsedNanos / 1000000;
   }
@@ -1051,7 +1015,7 @@ export class WebGPUBackend extends KernelBackend {
       sizeThreshold = CPU_HANDOFF_SIZE_THRESHOLD): boolean {
     return env().getBool('WEBGPU_CPU_FORWARD') &&
         inputs.every(
-            input => this.tensorMap.get(input.dataId).resourceInfo == null &&
+            input => this.tensorMap.get(input.dataId).resource == null &&
                 util.sizeFromShape(input.shape) < sizeThreshold);
   }
 
diff --git a/tfjs-backend-webgpu/src/backend_webgpu_test.ts b/tfjs-backend-webgpu/src/backend_webgpu_test.ts
index 9eebbc4ac0d..e8c35d6ff6f 100644
--- a/tfjs-backend-webgpu/src/backend_webgpu_test.ts
+++ b/tfjs-backend-webgpu/src/backend_webgpu_test.ts
@@ -256,8 +256,7 @@ describeWebGPU('keeping data on gpu ', () => {
           `Unexpected type. Actual: ${res.tensorRef.dtype}. ` +
           `Expected: float32`);
     }
-    const resData =
-        await webGPUBackend.getBufferData(res.buffer, res.buffer.size);
+    const resData = await webGPUBackend.getBufferData(res.buffer);
     const values = tf.util.convertBackendValuesAndArrayBuffer(
         resData, res.tensorRef.dtype);
     expectArraysEqual(values, data);
@@ -277,8 +276,7 @@ describeWebGPU('keeping data on gpu ', () => {
           `Unexpected type. Actual: ${res.tensorRef.dtype}. ` +
           `Expected: float32`);
     }
-    const resData =
-        await webGPUBackend.getBufferData(res.buffer, res.buffer.size);
+    const resData = await webGPUBackend.getBufferData(res.buffer);
     const values = tf.util.convertBackendValuesAndArrayBuffer(
         resData, res.tensorRef.dtype);
     expectArraysEqual(values, data);
@@ -324,8 +322,7 @@ describeWebGPU('keeping data on gpu ', () => {
     expect(endDataBuckets).toEqual(startDataBuckets + 1);
 
     const res = result as unknown as GPUData;
-    const resData =
-        await webGPUBackend.getBufferData(res.buffer, res.buffer.size);
+    const resData = await webGPUBackend.getBufferData(res.buffer);
     const values = tf.util.convertBackendValuesAndArrayBuffer(
         resData, res.tensorRef.dtype);
     expectArraysEqual(values, data);
diff --git a/tfjs-backend-webgpu/src/buffer_manager.ts b/tfjs-backend-webgpu/src/buffer_manager.ts
index 0790aa8523a..bafcebcc3b8 100644
--- a/tfjs-backend-webgpu/src/buffer_manager.ts
+++ b/tfjs-backend-webgpu/src/buffer_manager.ts
@@ -59,13 +59,14 @@ export class BufferManager {
     return buffer;
   }
 
-  releaseBuffer(
-      buffer: GPUBuffer, size: number, usage: GPUBufferUsageFlags,
-      reuse = true) {
+  releaseBuffer(buffer: GPUBuffer, reuse = true) {
     if (this.freeBuffers.size === 0) {
       return;
     }
 
+    const size = buffer.size;
+    const usage = buffer.usage;
+
     const key = getBufferKey(size, usage);
     const bufferArray = this.usedBuffers.get(key);
     const index = bufferArray.indexOf(buffer);
diff --git a/tfjs-backend-webgpu/src/kernels/FromPixels.ts b/tfjs-backend-webgpu/src/kernels/FromPixels.ts
index 982cdc7ad33..21cbf631ce2 100644
--- a/tfjs-backend-webgpu/src/kernels/FromPixels.ts
+++ b/tfjs-backend-webgpu/src/kernels/FromPixels.ts
@@ -19,7 +19,7 @@ import {env, KernelConfig, KernelFunc} from '@tensorflow/tfjs-core';
 import {FromPixels, FromPixelsAttrs, FromPixelsInputs, util} from '@tensorflow/tfjs-core';
 import {backend_util, TensorInfo} from '@tensorflow/tfjs-core';
 
-import {TextureInfo, WebGPUBackend} from '../backend_webgpu';
+import {WebGPUBackend} from '../backend_webgpu';
 import {FromPixelsProgram} from '../from_pixels_webgpu';
 
 export const fromPixelsConfig: KernelConfig = {
@@ -69,16 +69,10 @@ export function fromPixels(args: {
       false && env().getBool('WEBGPU_IMPORT_EXTERNAL_TEXTURE') && isVideo;
   const isVideoOrImage = isVideo || isImage;
   if (isImageBitmap || isCanvas || isVideoOrImage) {
-    let textureInfo: TextureInfo;
+    let resource;
     if (importVideo) {
-      textureInfo = {
-        width,
-        height,
-        format: null,
-        usage: null,
-        texture: backend.device.importExternalTexture(
-            {source: pixels as HTMLVideoElement})
-      };
+      resource = backend.device.importExternalTexture(
+          {source: pixels as HTMLVideoElement});
     } else {
       if (isVideoOrImage) {
         const newWillReadFrequently =
@@ -104,7 +98,7 @@ export function fromPixels(args: {
       backend.queue.copyExternalImageToTexture(
           {source: pixels as HTMLCanvasElement | ImageBitmap}, {texture},
           [outputShape[1], outputShape[0]]);
-      textureInfo = {width, height, format, usage, texture};
+      resource = texture;
     }
 
     const size = util.sizeFromShape(outputShape);
@@ -118,7 +112,7 @@ export function fromPixels(args: {
     ];
     const input = backend.makeTensorInfo([height, width], 'int32');
     const info = backend.tensorMap.get(input.dataId);
-    info.resourceInfo = textureInfo;
+    info.resource = resource;
 
     const result =
         backend.runWebGPUProgram(program, [input], 'int32', uniformData);
diff --git a/tfjs-backend-webgpu/src/kernels/GatherV2.ts b/tfjs-backend-webgpu/src/kernels/GatherV2.ts
index 24a26eeb6ce..000057ccfd0 100644
--- a/tfjs-backend-webgpu/src/kernels/GatherV2.ts
+++ b/tfjs-backend-webgpu/src/kernels/GatherV2.ts
@@ -18,9 +18,9 @@
 import {backend_util, buffer, GatherV2, GatherV2Attrs, GatherV2Inputs, KernelConfig, KernelFunc, Rank, TensorBuffer, TensorInfo, TypedArray, util} from '@tensorflow/tfjs-core';
 
 import {WebGPUBackend} from '../backend_webgpu';
+import {GatherProgram} from '../gather_webgpu';
 import {gatherV2ImplCPU} from '../kernel_utils/shared';
 
-import {GatherProgram} from '../gather_webgpu';
 import {reshape} from './Reshape';
 
 export function gatherV2(
@@ -68,16 +68,16 @@ export function gatherV2(
   ];
 
   if (backend.shouldExecuteOnCPU([x, indices])) {
-    const indicesBufferInfo = backend.tensorMap.get(flattenIndex.dataId);
-    const indicesValues = indicesBufferInfo.values as TypedArray;
-    const indicesBuf =
+    const indicesTensorData = backend.tensorMap.get(flattenIndex.dataId);
+    const indicesValues = indicesTensorData.values as TypedArray;
+    const indicesBuffer =
         buffer(flattenIndex.shape, flattenIndex.dtype, indicesValues) as
         TensorBuffer<Rank>;
-    const xBufferInfo = backend.tensorMap.get(flattenX.dataId);
-    const xValues = xBufferInfo.values as TypedArray;
-    const xBuf =
+    const flattenXTensorData = backend.tensorMap.get(flattenX.dataId);
+    const xValues = flattenXTensorData.values as TypedArray;
+    const xBuffer =
         buffer(flattenX.shape, flattenX.dtype, xValues) as TensorBuffer<Rank>;
-    const outBuf = gatherV2ImplCPU(xBuf, indicesBuf, flattenOutputShape);
+    const outBuf = gatherV2ImplCPU(xBuffer, indicesBuffer, flattenOutputShape);
 
     toDispose.forEach(t => backend.disposeData(t.dataId));
 
diff --git a/tfjs-backend-webgpu/src/kernels/Slice.ts b/tfjs-backend-webgpu/src/kernels/Slice.ts
index 6cb72fc5230..d968265013f 100644
--- a/tfjs-backend-webgpu/src/kernels/Slice.ts
+++ b/tfjs-backend-webgpu/src/kernels/Slice.ts
@@ -32,9 +32,9 @@ export function slice(
   slice_util.assertParamsValid(x, $begin, $size);
 
   if (backend.shouldExecuteOnCPU([x]) || x.dtype === 'string') {
-    const xBufferInfo = backend.tensorMap.get(x.dataId);
+    const xTensorData = backend.tensorMap.get(x.dataId);
     const outValues = sliceImplCPU(
-        xBufferInfo.values as TypedArray, $begin, $size, x.shape, x.dtype);
+        xTensorData.values as TypedArray, $begin, $size, x.shape, x.dtype);
     return backend.makeTensorInfo($size, x.dtype, outValues);
   }
 
diff --git a/tfjs-backend-webgpu/src/texture_manager.ts b/tfjs-backend-webgpu/src/texture_manager.ts
index 162bef382ca..0bdc2761281 100644
--- a/tfjs-backend-webgpu/src/texture_manager.ts
+++ b/tfjs-backend-webgpu/src/texture_manager.ts
@@ -63,13 +63,16 @@ export class TextureManager {
     return newTexture;
   }
 
-  releaseTexture(
-      texture: GPUTexture, width: number, height: number,
-      format: GPUTextureFormat, usage: GPUTextureUsageFlags) {
+  releaseTexture(texture: GPUTexture) {
     if (this.freeTextures.size === 0) {
       return;
     }
 
+    const width = texture.width;
+    const height = texture.height;
+    const format = texture.format;
+    const usage = texture.usage;
+
     const key = getTextureKey(width, height, format, usage);
     if (!this.freeTextures.has(key)) {
       this.freeTextures.set(key, []);

From cb9a98b90f5f2d52327137f15410d06705d51715 Mon Sep 17 00:00:00 2001
From: Xu Xing <xing.xu@intel.com>
Date: Sat, 6 May 2023 14:18:27 +0800
Subject: [PATCH 04/32] [e2e] Enable webgpu intergration test (#7543)

---
 e2e/integration_tests/backends_test.ts        | 72 ++++++++++---------
 e2e/integration_tests/constants.ts            |  2 +-
 e2e/integration_tests/convert_predict.ts      |  2 +
 e2e/integration_tests/cpu_forwarding_test.ts  | 60 +++-------------
 e2e/integration_tests/create_save_predict.ts  |  4 +-
 e2e/integration_tests/grad_layers_test.ts     | 14 ++--
 .../graph_model_golden_tests.ts               |  7 +-
 e2e/integration_tests/load_predict_test.ts    | 39 ++++------
 e2e/integration_tests/memory_leak_test.ts     |  2 +
 e2e/integration_tests/setup_test.ts           | 14 +++-
 e2e/karma.conf.js                             | 13 +++-
 e2e/package.json                              |  1 +
 e2e/scripts/run-browserstack-tests.sh         |  5 +-
 13 files changed, 109 insertions(+), 126 deletions(-)

diff --git a/e2e/integration_tests/backends_test.ts b/e2e/integration_tests/backends_test.ts
index fa2a8d66c53..b3cc7630a87 100644
--- a/e2e/integration_tests/backends_test.ts
+++ b/e2e/integration_tests/backends_test.ts
@@ -17,6 +17,7 @@
 
 import '@tensorflow/tfjs-backend-cpu';
 import '@tensorflow/tfjs-backend-webgl';
+import '@tensorflow/tfjs-backend-webgpu';
 
 import * as tfc from '@tensorflow/tfjs-core';
 // tslint:disable-next-line: no-imports-from-dist
@@ -27,27 +28,28 @@ import {SMOKE} from './constants';
 /**
  *  This file tests backend switching scenario.
  */
-
+// TODO: Support backend switching between wasm and cpu.
+// https://github.com/tensorflow/tfjs/issues/7623
 describeWithFlags(
     `${SMOKE} backend switching`, {
-      predicate: testEnv => testEnv.backendName === 'webgl' &&
-          tfc.findBackend('webgl') !== null && tfc.findBackend('cpu') !== null
+      predicate: testEnv =>
+          testEnv.backendName !== 'cpu' && testEnv.backendName !== 'wasm'
     },
 
-    () => {
-      it(`from webgl to cpu.`, async () => {
-        await tfc.setBackend('webgl');
+    (env) => {
+      it(`from ${env.name} to cpu.`, async () => {
+        await tfc.setBackend(env.name);
 
-        const webglBefore = tfc.engine().backend.numDataIds();
+        const backendBefore = tfc.engine().backend.numDataIds();
 
         const input = tfc.tensor2d([1, 1, 1, 1], [2, 2], 'float32');
-        // input is stored in webgl backend.
+        // input is stored in backend.
 
         const inputReshaped = tfc.reshape(input, [2, 2]);
 
-        const webglAfter = tfc.engine().backend.numDataIds();
+        const backendAfter = tfc.engine().backend.numDataIds();
 
-        expect(webglAfter).toEqual(webglBefore + 1);
+        expect(backendAfter).toEqual(backendBefore + 1);
 
         await tfc.setBackend('cpu');
 
@@ -56,8 +58,9 @@ describeWithFlags(
         const inputReshaped2 = tfc.reshape(inputReshaped, [2, 2]);
         // input moved to cpu.
 
-        // Because input is moved to cpu, data should be deleted from webgl
-        expect(tfc.findBackend('webgl').numDataIds()).toEqual(webglAfter - 1);
+        // Because input is moved to cpu, data should be deleted from backend.
+        expect(tfc.findBackend(env.name).numDataIds())
+            .toEqual(backendAfter - 1);
 
         const cpuAfter = tfc.engine().backend.numDataIds();
 
@@ -77,7 +80,7 @@ describeWithFlags(
         expect(after).toBe(cpuBefore);
       });
 
-      it(`from cpu to webgl.`, async () => {
+      it(`from cpu to ${env.name}.`, async () => {
         await tfc.setBackend('cpu');
 
         const cpuBefore = tfc.engine().backend.numDataIds();
@@ -91,46 +94,47 @@ describeWithFlags(
 
         expect(cpuAfter).toEqual(cpuBefore + 1);
 
-        await tfc.setBackend('webgl');
+        await tfc.setBackend(env.name);
 
-        const webglBefore = tfc.engine().backend.numDataIds();
+        const backendBefore = tfc.engine().backend.numDataIds();
 
         const inputReshaped2 = tfc.reshape(inputReshaped, [2, 2]);
-        // input moved to webgl.
+        // input moved to webgl or webgpu.
 
-        // Because input is moved to webgl, data should be deleted from cpu
+        // Because input is moved to backend, data should be deleted
+        // from cpu.
         expect(tfc.findBackend('cpu').numDataIds()).toEqual(cpuAfter - 1);
 
-        const webglAfter = tfc.engine().backend.numDataIds();
+        const backendAfter = tfc.engine().backend.numDataIds();
 
-        expect(webglAfter).toEqual(webglBefore + 1);
+        expect(backendAfter).toEqual(backendBefore + 1);
 
         input.dispose();
 
-        expect(tfc.engine().backend.numDataIds()).toEqual(webglAfter);
+        expect(tfc.engine().backend.numDataIds()).toEqual(backendAfter);
 
         inputReshaped.dispose();
 
-        expect(tfc.engine().backend.numDataIds()).toEqual(webglAfter);
+        expect(tfc.engine().backend.numDataIds()).toEqual(backendAfter);
 
         inputReshaped2.dispose();
 
         const after = tfc.engine().backend.numDataIds();
 
-        expect(after).toBe(webglBefore);
+        expect(after).toBe(backendBefore);
       });
 
       it('can execute op with data from mixed backends', async () => {
         const numTensors = tfc.memory().numTensors;
-        const webglNumDataIds = tfc.findBackend('webgl').numDataIds();
+        const backendNumDataIds = tfc.findBackend(env.name).numDataIds();
         const cpuNumDataIds = tfc.findBackend('cpu').numDataIds();
 
         await tfc.setBackend('cpu');
         // This scalar lives in cpu.
         const a = tfc.scalar(5);
 
-        await tfc.setBackend('webgl');
-        // This scalar lives in webgl.
+        await tfc.setBackend(env.name);
+        // This scalar lives in webgl or webgpu.
         const b = tfc.scalar(3);
 
         // Verify that ops can execute with mixed backend data.
@@ -141,32 +145,34 @@ describeWithFlags(
         tfc.test_util.expectArraysClose(await result.data(), [8]);
         expect(tfc.findBackend('cpu').numDataIds()).toBe(cpuNumDataIds + 3);
 
-        await tfc.setBackend('webgl');
+        await tfc.setBackend(env.name);
         tfc.test_util.expectArraysClose(await tfc.add(a, b).data(), [8]);
-        expect(tfc.findBackend('webgl').numDataIds()).toBe(webglNumDataIds + 3);
+        expect(tfc.findBackend(env.name).numDataIds())
+            .toBe(backendNumDataIds + 3);
 
         tfc.engine().endScope();
 
         expect(tfc.memory().numTensors).toBe(numTensors + 2);
-        expect(tfc.findBackend('webgl').numDataIds()).toBe(webglNumDataIds + 2);
+        expect(tfc.findBackend(env.name).numDataIds())
+            .toBe(backendNumDataIds + 2);
         expect(tfc.findBackend('cpu').numDataIds()).toBe(cpuNumDataIds);
 
         tfc.dispose([a, b]);
 
         expect(tfc.memory().numTensors).toBe(numTensors);
-        expect(tfc.findBackend('webgl').numDataIds()).toBe(webglNumDataIds);
+        expect(tfc.findBackend(env.name).numDataIds()).toBe(backendNumDataIds);
         expect(tfc.findBackend('cpu').numDataIds()).toBe(cpuNumDataIds);
       });
 
       // tslint:disable-next-line: ban
-      xit('can move complex tensor from cpu to webgl.', async () => {
+      xit(`can move complex tensor from cpu to ${env.name}.`, async () => {
         await tfc.setBackend('cpu');
 
         const real1 = tfc.tensor1d([1]);
         const imag1 = tfc.tensor1d([2]);
         const complex1 = tfc.complex(real1, imag1);
 
-        await tfc.setBackend('webgl');
+        await tfc.setBackend(env.name);
 
         const real2 = tfc.tensor1d([3]);
         const imag2 = tfc.tensor1d([4]);
@@ -178,8 +184,8 @@ describeWithFlags(
       });
 
       // tslint:disable-next-line: ban
-      xit('can move complex tensor from webgl to cpu.', async () => {
-        await tfc.setBackend('webgl');
+      xit(`can move complex tensor from ${env.name} to cpu.`, async () => {
+        await tfc.setBackend(env.name);
 
         const real1 = tfc.tensor1d([1]);
         const imag1 = tfc.tensor1d([2]);
diff --git a/e2e/integration_tests/constants.ts b/e2e/integration_tests/constants.ts
index fe2fd38a641..e6cba15c0e1 100644
--- a/e2e/integration_tests/constants.ts
+++ b/e2e/integration_tests/constants.ts
@@ -27,7 +27,7 @@ export const GOLDEN = '#GOLDEN';
 export const TAGS = [SMOKE, REGRESSION, GOLDEN];
 
 /** Testing backends. */
-export const BACKENDS = ['cpu', 'webgl'];
+export const BACKENDS = ['cpu', 'webgl', 'webgpu'];
 
 /** Testing models for CUJ: create -> save -> predict. */
 export const LAYERS_MODELS = [
diff --git a/e2e/integration_tests/convert_predict.ts b/e2e/integration_tests/convert_predict.ts
index 5d0b94cfcaa..97f67b23fe5 100644
--- a/e2e/integration_tests/convert_predict.ts
+++ b/e2e/integration_tests/convert_predict.ts
@@ -26,6 +26,7 @@
  */
 import '@tensorflow/tfjs-backend-cpu';
 import '@tensorflow/tfjs-backend-webgl';
+import '@tensorflow/tfjs-backend-webgpu';
 
 import * as tfconverter from '@tensorflow/tfjs-converter';
 import * as tfc from '@tensorflow/tfjs-core';
@@ -61,6 +62,7 @@ describeWithFlags(`${REGRESSION} convert_predict`, ALL_ENVS, (env) => {
         continue;
       }
       it(`${model}.`, async () => {
+        await tfc.setBackend(env.name);
         let inputsNames: string[];
         let inputsData: tfc.TypedArray[];
         let inputsShapes: number[][];
diff --git a/e2e/integration_tests/cpu_forwarding_test.ts b/e2e/integration_tests/cpu_forwarding_test.ts
index 8e561656bb8..02a671b5edd 100644
--- a/e2e/integration_tests/cpu_forwarding_test.ts
+++ b/e2e/integration_tests/cpu_forwarding_test.ts
@@ -17,6 +17,7 @@
 
 import '@tensorflow/tfjs-backend-cpu';
 import '@tensorflow/tfjs-backend-webgl';
+import '@tensorflow/tfjs-backend-webgpu';
 
 import * as tfc from '@tensorflow/tfjs-core';
 // tslint:disable-next-line: no-imports-from-dist
@@ -29,26 +30,27 @@ import {SMOKE} from './constants';
  */
 
 describeWithFlags(
-    `${SMOKE} cpu forwarding (webgl->cpu)`, {
-      predicate: testEnv => testEnv.backendName === 'webgl' &&
-          tfc.findBackend('webgl') !== null && tfc.findBackend('cpu') !== null
-    },
+    `${SMOKE} cpu forwarding)`,
+    {predicate: testEnv => testEnv.backendName !== 'cpu'},
 
-    () => {
+    (env) => {
       let webglCpuForwardFlagSaved: boolean;
+      let webgpuCpuForwardFlagSaved: boolean;
 
-      beforeAll(() => {
+      beforeAll(async () => {
         webglCpuForwardFlagSaved = tfc.env().getBool('WEBGL_CPU_FORWARD');
         tfc.env().set('WEBGL_CPU_FORWARD', true);
+        webgpuCpuForwardFlagSaved = tfc.env().getBool('WEBGPU_CPU_FORWARD');
+        tfc.env().set('WEBGPU_CPU_FORWARD', true);
+        await tfc.setBackend(env.name);
       });
 
       afterAll(() => {
         tfc.env().set('WEBGL_CPU_FORWARD', webglCpuForwardFlagSaved);
+        tfc.env().set('WEBGPU_CPU_FORWARD', webgpuCpuForwardFlagSaved);
       });
 
       it('should work for slice.', async () => {
-        await tfc.setBackend('webgl');
-
         const a = tfc.tensor3d([1, 2, 3, 4, 5, 6, 7, 8], [2, 2, 2]);
         const result = a.slice([0, 1, 1]);
         expect(result.shape).toEqual([2, 1, 1]);
@@ -56,8 +58,6 @@ describeWithFlags(
       });
 
       it('should work for stridedSlice.', async () => {
-        await tfc.setBackend('webgl');
-
         const t = tfc.tensor2d([
           [1, 2, 3, 4, 5],
           [2, 3, 4, 5, 6],
@@ -84,8 +84,6 @@ describeWithFlags(
       });
 
       it('should work for concat.', async () => {
-        await tfc.setBackend('webgl');
-
         const a = tfc.tensor1d([3]);
         const b = tfc.tensor1d([5]);
 
@@ -95,8 +93,6 @@ describeWithFlags(
       });
 
       it('should work for neg.', async () => {
-        await tfc.setBackend('webgl');
-
         const a = tfc.tensor1d([1, -3, 2, 7, -4]);
         const result = tfc.neg(a);
         tfc.test_util.expectArraysClose(
@@ -104,8 +100,6 @@ describeWithFlags(
       });
 
       it('should work for multiply.', async () => {
-        await tfc.setBackend('webgl');
-
         const a = tfc.tensor2d([1, 2, -3, -4], [2, 2]);
         const b = tfc.tensor2d([5, 3, 4, -7], [2, 2]);
         const expected = [5, 6, -12, 28];
@@ -116,8 +110,6 @@ describeWithFlags(
       });
 
       it('should work for gather.', async () => {
-        await tfc.setBackend('webgl');
-
         const t = tfc.tensor1d([1, 2, 3]);
 
         const t2 = tfc.gather(t, tfc.scalar(1, 'int32'), 0);
@@ -127,16 +119,12 @@ describeWithFlags(
       });
 
       it('should work for prod.', async () => {
-        await tfc.setBackend('webgl');
-
         const a = tfc.tensor2d([1, 2, 3, 0, 0, 1], [3, 2]);
         const result = tfc.prod(a);
         tfc.test_util.expectArraysClose(await result.data(), 0);
       });
 
       it('should work for less.', async () => {
-        await tfc.setBackend('webgl');
-
         const a = tfc.tensor1d([1, 4, 5], 'int32');
         const b = tfc.tensor1d([2, 3, 5], 'int32');
         const res = tfc.less(a, b);
@@ -146,8 +134,6 @@ describeWithFlags(
       });
 
       it('should work for greater.', async () => {
-        await tfc.setBackend('webgl');
-
         const a = tfc.tensor1d([1, 4, 5], 'int32');
         const b = tfc.tensor1d([2, 3, 5], 'int32');
         const res = tfc.greater(a, b);
@@ -157,8 +143,6 @@ describeWithFlags(
       });
 
       it('should work for minimum.', async () => {
-        await tfc.setBackend('webgl');
-
         const a = tfc.tensor1d([0.5, 3, -0.1, -4]);
         const b = tfc.tensor1d([0.2, 0.4, 0.25, 0.15]);
         const result = tfc.minimum(a, b);
@@ -169,8 +153,6 @@ describeWithFlags(
       });
 
       it('should work for maximum.', async () => {
-        await tfc.setBackend('webgl');
-
         const a = tfc.tensor1d([0.5, 3, -0.1, -4]);
         const b = tfc.tensor1d([0.2, 0.4, 0.25, 0.15]);
         const result = tfc.maximum(a, b);
@@ -181,16 +163,12 @@ describeWithFlags(
       });
 
       it('should work for max.', async () => {
-        await tfc.setBackend('webgl');
-
         const a = tfc.tensor1d([3, -1, 0, 100, -7, 2]);
         const r = tfc.max(a);
         tfc.test_util.expectArraysClose(await r.data(), 100);
       });
 
       it('should work for add.', async () => {
-        await tfc.setBackend('webgl');
-
         const c = tfc.scalar(5);
         const a = tfc.tensor1d([1, 2, 3]);
 
@@ -200,8 +178,6 @@ describeWithFlags(
       });
 
       it('should work for sub.', async () => {
-        await tfc.setBackend('webgl');
-
         const c = tfc.scalar(5);
         const a = tfc.tensor1d([7, 2, 3]);
 
@@ -211,16 +187,12 @@ describeWithFlags(
       });
 
       it('should work for ceil.', async () => {
-        await tfc.setBackend('webgl');
-
         const a = tfc.tensor1d([1.5, 2.1, -1.4]);
         const r = tfc.ceil(a);
         tfc.test_util.expectArraysClose(await r.data(), [2, 3, -1]);
       });
 
       it('should work for floor.', async () => {
-        await tfc.setBackend('webgl');
-
         const a = tfc.tensor1d([1.5, 2.1, -1.4]);
         const r = tfc.floor(a);
 
@@ -228,8 +200,6 @@ describeWithFlags(
       });
 
       it('should work for exp.', async () => {
-        await tfc.setBackend('webgl');
-
         const a = tfc.tensor1d([1, 2, 0]);
         const r = tfc.exp(a);
 
@@ -238,8 +208,6 @@ describeWithFlags(
       });
 
       it('should work for expm1.', async () => {
-        await tfc.setBackend('webgl');
-
         const a = tfc.tensor1d([1, 2, 0]);
         const r = tfc.expm1(a);
 
@@ -248,8 +216,6 @@ describeWithFlags(
       });
 
       it('should work for log.', async () => {
-        await tfc.setBackend('webgl');
-
         const a = tfc.tensor1d([1, 2]);
         const r = tfc.log(a);
         tfc.test_util.expectArraysClose(
@@ -257,8 +223,6 @@ describeWithFlags(
       });
 
       it('should work for rsqrt.', async () => {
-        await tfc.setBackend('webgl');
-
         const a = tfc.tensor1d([2, 4]);
         const r = tfc.rsqrt(a);
         tfc.test_util.expectArraysClose(
@@ -266,16 +230,12 @@ describeWithFlags(
       });
 
       it('should work for abs.', async () => {
-        await tfc.setBackend('webgl');
-
         const a = tfc.tensor1d([1, -2, 0, 3, -0.1]);
         const result = tfc.abs(a);
         tfc.test_util.expectArraysClose(await result.data(), [1, 2, 0, 3, 0.1]);
       });
 
       it('should work for transpose.', async () => {
-        await tfc.setBackend('webgl');
-
         const t = tfc.tensor2d([1, 11, 2, 22, 3, 33, 4, 44], [2, 4]);
         const t2 = tfc.transpose(t, [1, 0]);
 
diff --git a/e2e/integration_tests/create_save_predict.ts b/e2e/integration_tests/create_save_predict.ts
index 8ad642a7413..baea45d0ee3 100644
--- a/e2e/integration_tests/create_save_predict.ts
+++ b/e2e/integration_tests/create_save_predict.ts
@@ -17,6 +17,7 @@
 
 import '@tensorflow/tfjs-backend-cpu';
 import '@tensorflow/tfjs-backend-webgl';
+import '@tensorflow/tfjs-backend-webgpu';
 
 import * as tfc from '@tensorflow/tfjs-core';
 // tslint:disable-next-line: no-imports-from-dist
@@ -38,7 +39,7 @@ const DATA_URL = 'create_save_predict_data';
  *  - Make inference using each backends, and validate the results against
  *    Keras results.
  */
-describeWithFlags(`${REGRESSION} create_save_predict`, ALL_ENVS, () => {
+describeWithFlags(`${REGRESSION} create_save_predict`, ALL_ENVS, (env) => {
   let originalTimeout: number;
 
   beforeAll(() => {
@@ -54,6 +55,7 @@ describeWithFlags(`${REGRESSION} create_save_predict`, ALL_ENVS, () => {
 
   LAYERS_MODELS.forEach(model => {
     it(`${model}.`, async () => {
+      await tfc.setBackend(env.name);
       let inputsData: tfc.TypedArray[];
       let inputsShapes: number[][];
       let kerasOutputData: tfc.TypedArray[];
diff --git a/e2e/integration_tests/grad_layers_test.ts b/e2e/integration_tests/grad_layers_test.ts
index 45a0d8e647b..3f20efc9a66 100644
--- a/e2e/integration_tests/grad_layers_test.ts
+++ b/e2e/integration_tests/grad_layers_test.ts
@@ -17,12 +17,11 @@
 
 import '@tensorflow/tfjs-backend-cpu';
 import '@tensorflow/tfjs-backend-webgl';
+import '@tensorflow/tfjs-backend-webgpu';
 
 import * as tfc from '@tensorflow/tfjs-core';
 // tslint:disable-next-line: no-imports-from-dist
-import '@tensorflow/tfjs-core/dist/register_all_gradients';
-// tslint:disable-next-line: no-imports-from-dist
-import {describeWithFlags, Constraints} from '@tensorflow/tfjs-core/dist/jasmine_util';
+import {Constraints, describeWithFlags} from '@tensorflow/tfjs-core/dist/jasmine_util';
 import * as tfl from '@tensorflow/tfjs-layers';
 
 import {SMOKE} from './constants';
@@ -31,19 +30,16 @@ import {SMOKE} from './constants';
 const NOT_WASM: Constraints = {
   predicate: testEnv => testEnv.backendName !== 'wasm',
 };
-
 /**
  *  Tests that tf.grad works for layers models.
  *  Regression test for https://github.com/tensorflow/tfjs/issues/4130
  */
 describe(`${SMOKE} tf.grad for layers models`, () => {
-  describeWithFlags(`layers_model`, NOT_WASM, () => {
-    let model: tfl.Sequential;
-
+  describeWithFlags(`layers_model`, NOT_WASM, (env) => {
     it(`can compute grad of prediction`, async () => {
-      model = tfl.sequential();
+      await tfc.setBackend(env.name);
+      const model = tfl.sequential();
       model.add(tfl.layers.dense({inputShape: [1], units: 1}));
-
       const forward = (x: tfc.Tensor) => model.predict(x) as tfc.Tensor;
       const grad = tfc.grad(forward);
 
diff --git a/e2e/integration_tests/graph_model_golden_tests.ts b/e2e/integration_tests/graph_model_golden_tests.ts
index d52fa24511b..e467e5760cf 100644
--- a/e2e/integration_tests/graph_model_golden_tests.ts
+++ b/e2e/integration_tests/graph_model_golden_tests.ts
@@ -26,22 +26,21 @@ import {GOLDEN, KARMA_SERVER} from './constants';
 import * as GOLDEN_MODEL_DATA_FILENAMES from './graph_model_golden_data/filenames.json';
 import {GraphModeGoldenData, TensorDetail} from './types';
 
-
 /** Directory that stores the model golden data. */
 const DATA_URL = 'graph_model_golden_data';
 const INTERMEDIATE_NODE_TESTS_NUM = 5;
 
-
-describeWithFlags(`${GOLDEN} graph_model_golden`, ALL_ENVS, () => {
+describeWithFlags(`${GOLDEN} graph_model_golden`, ALL_ENVS, (env) => {
   let originalTimeout: number;
 
-  beforeAll(() => {
+  beforeAll(async () => {
     // This test needs more time to finish the async fetch, adjusting
     // jasmine timeout for this test to avoid flakiness. See jasmine
     // documentation for detail:
     // https://jasmine.github.io/2.0/introduction.html#section-42
     originalTimeout = jasmine.DEFAULT_TIMEOUT_INTERVAL;
     jasmine.DEFAULT_TIMEOUT_INTERVAL = 1000000;
+    await tfc.setBackend(env.name);
   });
 
   afterAll(() => jasmine.DEFAULT_TIMEOUT_INTERVAL = originalTimeout);
diff --git a/e2e/integration_tests/load_predict_test.ts b/e2e/integration_tests/load_predict_test.ts
index 8f07859ad38..a2de61c7664 100644
--- a/e2e/integration_tests/load_predict_test.ts
+++ b/e2e/integration_tests/load_predict_test.ts
@@ -17,6 +17,7 @@
 
 import '@tensorflow/tfjs-backend-cpu';
 import '@tensorflow/tfjs-backend-webgl';
+import '@tensorflow/tfjs-backend-webgpu';
 
 import * as tfconverter from '@tensorflow/tfjs-converter';
 import * as tfc from '@tensorflow/tfjs-core';
@@ -34,35 +35,24 @@ import {KARMA_SERVER, SMOKE} from './constants';
  *  - Make inference using each backends.
  */
 describe(`${SMOKE} load_predict`, () => {
-  describeWithFlags(`layers_model`, ALL_ENVS, () => {
-    let model: tfl.LayersModel;
-    let inputs: tfc.Tensor;
-
-    const expected = [
-      -0.003578941337764263, 0.0028922036290168762, -0.002957976423203945,
-      0.00955402385443449
-    ];
-
-    beforeAll(async () => {
-      model = await tfl.loadLayersModel(
-          `${KARMA_SERVER}/load_predict_data/layers_model/model.json`);
-    });
-
-    beforeEach(() => {
-      inputs = tfc.tensor([86, 11, 62, 40, 36, 75, 82, 94, 67, 75], [1, 10]);
-    });
-
-    afterEach(() => {
-      inputs.dispose();
-    });
-
+  describeWithFlags(`layers_model`, ALL_ENVS, (env) => {
     it(`predict`, async () => {
+      await tfc.setBackend(env.name);
+      const model = await tfl.loadLayersModel(
+          `${KARMA_SERVER}/load_predict_data/layers_model/model.json`);
+      const inputs =
+          tfc.tensor([86, 11, 62, 40, 36, 75, 82, 94, 67, 75], [1, 10]);
+      const expected = [
+        -0.003578941337764263, 0.0028922036290168762, -0.002957976423203945,
+        0.00955402385443449
+      ];
       const result = model.predict(inputs) as tfc.Tensor;
       tfc.test_util.expectArraysClose(await result.data(), expected);
+      inputs.dispose();
     });
   });
 
-  describeWithFlags(`graph_model`, ALL_ENVS, async () => {
+  describeWithFlags(`graph_model`, ALL_ENVS, async (env) => {
     let a: tfc.Tensor;
 
     const expected = [
@@ -70,7 +60,8 @@ describe(`${SMOKE} load_predict`, () => {
       -0.18349379301071167
     ];
 
-    beforeEach(() => {
+    beforeEach(async () => {
+      await tfc.setBackend(env.name);
       a = tfc.tensor2d([1, 1, 1, 1], [2, 2], 'float32');
     });
 
diff --git a/e2e/integration_tests/memory_leak_test.ts b/e2e/integration_tests/memory_leak_test.ts
index aef72ca0cd7..bc849475edd 100644
--- a/e2e/integration_tests/memory_leak_test.ts
+++ b/e2e/integration_tests/memory_leak_test.ts
@@ -16,6 +16,8 @@
  */
 
 import '@tensorflow/tfjs-backend-cpu';
+import '@tensorflow/tfjs-backend-webgl';
+import '@tensorflow/tfjs-backend-webgpu';
 
 import * as tfconverter from '@tensorflow/tfjs-converter';
 import * as tfc from '@tensorflow/tfjs-core';
diff --git a/e2e/integration_tests/setup_test.ts b/e2e/integration_tests/setup_test.ts
index 9f139083fb8..915dd68e0f9 100644
--- a/e2e/integration_tests/setup_test.ts
+++ b/e2e/integration_tests/setup_test.ts
@@ -19,9 +19,10 @@
 import '@tensorflow/tfjs-core';
 // tslint:disable-next-line: no-imports-from-dist
 import '@tensorflow/tfjs-core/dist/public/chained_ops/register_all_chained_ops';
+import '@tensorflow/tfjs-backend-wasm';
+
 // tslint:disable-next-line: no-imports-from-dist
 import {parseTestEnvFromKarmaFlags, registerTestEnv, setTestEnvs, TEST_ENVS} from '@tensorflow/tfjs-core/dist/jasmine_util';
-import '@tensorflow/tfjs-backend-wasm';
 
 import {TAGS} from './constants';
 
@@ -44,6 +45,17 @@ registerTestEnv({
 
 registerTestEnv({name: 'cpu', backendName: 'cpu', isDataSync: true});
 
+// TODO: Support test windows on WebGPU. Bug:
+// https://github.com/tensorflow/tfjs/issues/7616.
+if (navigator.platform.toUpperCase().indexOf('MAC') >= 0) {
+  registerTestEnv({
+    name: 'webgpu',
+    backendName: 'webgpu',
+    flags: {'WEBGPU_CPU_FORWARD': false},
+    isDataSync: true
+  });
+}
+
 // tslint:disable-next-line:no-any
 declare let __karma__: any;
 if (typeof __karma__ !== 'undefined') {
diff --git a/e2e/karma.conf.js b/e2e/karma.conf.js
index ec8be646e46..3b379d9c713 100644
--- a/e2e/karma.conf.js
+++ b/e2e/karma.conf.js
@@ -111,6 +111,13 @@ const browserstackConfig = {
   port: 9876
 };
 
+const chromeWebgpuFlags = [
+  '--enable-unsafe-webgpu',  // Can be removed after WebGPU release
+  '--use-webgpu-adapter=swiftshader',
+  // https://github.com/tensorflow/tfjs/issues/7631
+  '--disable-vulkan-fallback-to-gl-for-testing',
+];
+
 module.exports = function(config) {
   const args = [];
 
@@ -156,7 +163,8 @@ module.exports = function(config) {
         browser: 'chrome',
         browser_version: 'latest',
         os: 'OS X',
-        os_version: 'High Sierra'
+        os_version: 'High Sierra',
+        flags: chromeWebgpuFlags,
       },
       bs_firefox_mac: {
         base: 'BrowserStack',
@@ -191,7 +199,8 @@ module.exports = function(config) {
         browser: 'chrome',
         browser_version: '101.0',
         os: 'Windows',
-        os_version: '10'
+        os_version: '10',
+        flags: chromeWebgpuFlags,
       }
     },
     client: {jasmine: {random: false}, args: args, captureConsole: true},
diff --git a/e2e/package.json b/e2e/package.json
index ac3ce61be79..15f175e72af 100644
--- a/e2e/package.json
+++ b/e2e/package.json
@@ -13,6 +13,7 @@
     "@tensorflow/tfjs-backend-cpu": "link:../link-package/node_modules/@tensorflow/tfjs-backend-cpu",
     "@tensorflow/tfjs-backend-wasm": "link:../link-package/node_modules/@tensorflow/tfjs-backend-wasm",
     "@tensorflow/tfjs-backend-webgl": "link:../link-package/node_modules/@tensorflow/tfjs-backend-webgl",
+    "@tensorflow/tfjs-backend-webgpu": "link:../link-package/node_modules/@tensorflow/tfjs-backend-webgpu",
     "@tensorflow/tfjs-converter": "link:../link-package/node_modules/@tensorflow/tfjs-converter",
     "@tensorflow/tfjs-core": "link:../link-package/node_modules/@tensorflow/tfjs-core",
     "@tensorflow/tfjs-data": "link:../link-package/node_modules/@tensorflow/tfjs-data",
diff --git a/e2e/scripts/run-browserstack-tests.sh b/e2e/scripts/run-browserstack-tests.sh
index 260de7e08d1..84c774223ea 100755
--- a/e2e/scripts/run-browserstack-tests.sh
+++ b/e2e/scripts/run-browserstack-tests.sh
@@ -23,7 +23,10 @@ set -e
 TAGS="#SMOKE,#REGRESSION"
 TAGS_WITH_GOLDEN="$TAGS,#GOLDEN"
 
-# Test windows 10 with golden model tests
+# Test macOS with smoke/regression/golden tests.
+COMMANDS+=("yarn run-browserstack --browsers=bs_chrome_mac --tags '$TAGS_WITH_GOLDEN'")
+
+# Test windows 10 with smoke/regression/golden tests.
 COMMANDS+=("yarn run-browserstack --browsers=win_10_chrome --tags '$TAGS_WITH_GOLDEN'")
 
 # Test script tag bundles

From db099a493d67b3f7f54e83cb8dd4264df05010c4 Mon Sep 17 00:00:00 2001
From: Linchenn <40653845+Linchenn@users.noreply.github.com>
Date: Sat, 6 May 2023 16:51:17 -0700
Subject: [PATCH 05/32] Add Draw API (#7628)

FEATURE

* add draw

* add tests

* add comments

* lint

* upd

* typo

* cmt

* rename contextOption to canvasOption

* Update tfjs-core/src/ops/browser.ts

Co-authored-by: Matthew Soulanille <matthew@soulanille.net>

* Update tfjs-core/src/ops/draw_test.ts

Co-authored-by: Matthew Soulanille <matthew@soulanille.net>

* warn once

* lint

* unify options

* typo

* tune

* tune

* tune

* typo

---------

Co-authored-by: Matthew Soulanille <matthew@soulanille.net>
---
 tfjs-backend-cpu/BUILD.bazel                 |   2 +-
 tfjs-backend-cpu/src/kernels/Draw.ts         |  96 +++++++++++++++++
 tfjs-backend-cpu/src/register_all_kernels.ts |   2 +
 tfjs-backend-webgl/src/setup_test.ts         |   2 +-
 tfjs-backend-webgpu/src/setup_test.ts        |   6 ++
 tfjs-core/src/kernel_names.ts                |   9 +-
 tfjs-core/src/ops/browser.ts                 | 103 ++++++++++++++----
 tfjs-core/src/ops/draw_test.ts               | 104 +++++++++++++++++++
 tfjs-core/src/types.ts                       |  54 +++++++---
 9 files changed, 340 insertions(+), 38 deletions(-)
 create mode 100644 tfjs-backend-cpu/src/kernels/Draw.ts
 create mode 100644 tfjs-core/src/ops/draw_test.ts

diff --git a/tfjs-backend-cpu/BUILD.bazel b/tfjs-backend-cpu/BUILD.bazel
index 62382ed2684..ece7e27f28d 100644
--- a/tfjs-backend-cpu/BUILD.bazel
+++ b/tfjs-backend-cpu/BUILD.bazel
@@ -41,7 +41,7 @@ tfjs_web_test(
     ],
     headless = False,
     presubmit_browsers = [
-        "bs_safari_mac",
+        "bs_chrome_mac",
     ],
 )
 
diff --git a/tfjs-backend-cpu/src/kernels/Draw.ts b/tfjs-backend-cpu/src/kernels/Draw.ts
new file mode 100644
index 00000000000..b71d7c2c3a5
--- /dev/null
+++ b/tfjs-backend-cpu/src/kernels/Draw.ts
@@ -0,0 +1,96 @@
+/**
+ * @license
+ * Copyright 2023 Google LLC.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * =============================================================================
+ */
+
+import {Draw, DrawAttrs, DrawInputs, KernelConfig, KernelFunc, TypedArray} from '@tensorflow/tfjs-core';
+import {TensorInfo} from '@tensorflow/tfjs-core';
+
+import {MathBackendCPU} from '../backend_cpu';
+
+export function draw(
+    args: {inputs: DrawInputs, backend: MathBackendCPU, attrs: DrawAttrs}):
+    TensorInfo {
+  const {inputs, backend, attrs} = args;
+  const {image} = inputs;
+  const {canvas, options} = attrs;
+  const {contextOptions, imageOptions} = options || {};
+  const alpha = imageOptions ?.alpha || 1;
+
+  const contextType = contextOptions ?.contextType || '2d';
+  if (contextType !== '2d') {
+    throw new Error(`Context type ${
+        contextOptions.contextType} is not supported by the CPU backend.`);
+  }
+  const ctx = canvas.getContext(contextType,
+    contextOptions?.contextAttributes || {}) as CanvasRenderingContext2D ;
+  if (ctx == null) {
+    throw new Error(`Could not get the context with ${contextType} type.`);
+  }
+
+  const [height, width] = image.shape.slice(0, 2);
+  const depth = image.shape.length === 2 ? 1 : image.shape[2];
+  const data = backend.data.get(image.dataId).values as TypedArray;
+  const multiplier = image.dtype === 'float32' ? 255 : 1;
+  const bytes = new Uint8ClampedArray(width * height * 4);
+
+  for (let i = 0; i < height * width; ++i) {
+    const rgba = [0, 0, 0, 255 * alpha];
+
+    for (let d = 0; d < depth; d++) {
+      const value = data[i * depth + d];
+
+      if (image.dtype === 'float32') {
+        if (value < 0 || value > 1) {
+          throw new Error(
+              `Tensor values for a float32 Tensor must be in the ` +
+              `range [0 - 1] but encountered ${value}.`);
+        }
+      } else if (image.dtype === 'int32') {
+        if (value < 0 || value > 255) {
+          throw new Error(
+              `Tensor values for a int32 Tensor must be in the ` +
+              `range [0 - 255] but encountered ${value}.`);
+        }
+      }
+
+      if (depth === 1) {
+        rgba[0] = value * multiplier;
+        rgba[1] = value * multiplier;
+        rgba[2] = value * multiplier;
+      } else {
+        rgba[d] = value * multiplier;
+      }
+    }
+
+    const j = i * 4;
+    bytes[j + 0] = Math.round(rgba[0]);
+    bytes[j + 1] = Math.round(rgba[1]);
+    bytes[j + 2] = Math.round(rgba[2]);
+    bytes[j + 3] = Math.round(rgba[3]);
+  }
+
+  canvas.width = width;
+  canvas.height = height;
+  const imageData = new ImageData(bytes, width, height);
+  ctx.putImageData(imageData, 0, 0);
+  return image;
+}
+
+export const drawConfig: KernelConfig = {
+  kernelName: Draw,
+  backendName: 'cpu',
+  kernelFunc: draw as unknown as KernelFunc
+};
diff --git a/tfjs-backend-cpu/src/register_all_kernels.ts b/tfjs-backend-cpu/src/register_all_kernels.ts
index e614e311862..60f61bfd16d 100644
--- a/tfjs-backend-cpu/src/register_all_kernels.ts
+++ b/tfjs-backend-cpu/src/register_all_kernels.ts
@@ -70,6 +70,7 @@ import {diagConfig} from './kernels/Diag';
 import {dilation2DConfig} from './kernels/Dilation2D';
 import {dilation2DBackpropFilterConfig} from './kernels/Dilation2DBackpropFilter';
 import {dilation2DBackpropInputConfig} from './kernels/Dilation2DBackpropInput';
+import {drawConfig} from './kernels/Draw';
 import {einsumConfig} from './kernels/Einsum';
 import {eluConfig} from './kernels/Elu';
 import {eluGradConfig} from './kernels/EluGrad';
@@ -244,6 +245,7 @@ const kernelConfigs: KernelConfig[] = [
   dilation2DConfig,
   dilation2DBackpropFilterConfig,
   dilation2DBackpropInputConfig,
+  drawConfig,
   einsumConfig,
   eluConfig,
   eluGradConfig,
diff --git a/tfjs-backend-webgl/src/setup_test.ts b/tfjs-backend-webgl/src/setup_test.ts
index 343a8e5e9af..0baca737d62 100644
--- a/tfjs-backend-webgl/src/setup_test.ts
+++ b/tfjs-backend-webgl/src/setup_test.ts
@@ -36,7 +36,7 @@ const customInclude = (testName: string) => {
     'isBrowser: false', 'dilation gradient',
     'throws when index is out of bound',
     // otsu tests for threshold op is failing on windows
-    'method otsu'
+    'method otsu', 'Draw on 2d context'
   ];
   for (const subStr of toExclude) {
     if (testName.includes(subStr)) {
diff --git a/tfjs-backend-webgpu/src/setup_test.ts b/tfjs-backend-webgpu/src/setup_test.ts
index 83917808a3b..9095d6f30ce 100644
--- a/tfjs-backend-webgpu/src/setup_test.ts
+++ b/tfjs-backend-webgpu/src/setup_test.ts
@@ -124,6 +124,12 @@ const TEST_FILTERS: TestFilter[] = [
       'canvas and image match',  // Failing on Linux
     ],
   },
+  {
+    startsWith: 'Draw',
+    excludes: [
+      'on 2d context',
+    ]
+  },
   {
     startsWith: 'sign ',
     excludes: [
diff --git a/tfjs-core/src/kernel_names.ts b/tfjs-core/src/kernel_names.ts
index 021b6d52afa..10e7d7d1a26 100644
--- a/tfjs-core/src/kernel_names.ts
+++ b/tfjs-core/src/kernel_names.ts
@@ -22,7 +22,7 @@ import {NamedTensorInfoMap} from './kernel_registry';
 import {ExplicitPadding} from './ops/conv_util';
 import {Activation} from './ops/fused_types';
 import {TensorInfo} from './tensor_info';
-import {DataType, PixelData} from './types';
+import {DataType, DrawOptions, PixelData} from './types';
 
 export const Abs = 'Abs';
 export type AbsInputs = UnaryInputs;
@@ -335,6 +335,13 @@ export const Dilation2DBackpropFilter = 'Dilation2DBackpropFilter';
 export type Dilation2DBackpropFilterInputs =
     Pick<NamedTensorInfoMap, 'x'|'filter'|'dy'>;
 
+export const Draw = 'Draw';
+export type DrawInputs = Pick<NamedTensorInfoMap, 'image'>;
+export interface DrawAttrs {
+  canvas: HTMLCanvasElement;
+  options?: DrawOptions;
+}
+
 export const RealDiv = 'RealDiv';
 export type RealDivInputs = BinaryInputs;
 
diff --git a/tfjs-core/src/ops/browser.ts b/tfjs-core/src/ops/browser.ts
index df33b06518e..45141038ca2 100644
--- a/tfjs-core/src/ops/browser.ts
+++ b/tfjs-core/src/ops/browser.ts
@@ -17,18 +17,19 @@
 
 import {ENGINE} from '../engine';
 import {env} from '../environment';
-import {FromPixels, FromPixelsAttrs, FromPixelsInputs} from '../kernel_names';
+import {Draw, DrawAttrs, DrawInputs, FromPixels, FromPixelsAttrs, FromPixelsInputs} from '../kernel_names';
 import {getKernel, NamedAttrMap} from '../kernel_registry';
 import {Tensor, Tensor2D, Tensor3D} from '../tensor';
 import {NamedTensorMap} from '../tensor_types';
 import {convertToTensor} from '../tensor_util_env';
-import {PixelData, TensorLike} from '../types';
+import {DrawOptions, ImageOptions, PixelData, TensorLike} from '../types';
 
 import {cast} from './cast';
 import {op} from './operation';
 import {tensor3d} from './tensor3d';
 
 let fromPixels2DContext: CanvasRenderingContext2D;
+let hasToPixelsWarned = false;
 
 /**
  * Creates a `tf.Tensor` from an image.
@@ -145,9 +146,8 @@ function fromPixels_(
               'Reason: OffscreenCanvas Context2D rendering is not supported.');
         }
       } else {
-        fromPixels2DContext =
-            document.createElement('canvas').getContext(
-                '2d', {willReadFrequently: true});
+        fromPixels2DContext = document.createElement('canvas').getContext(
+            '2d', {willReadFrequently: true});
       }
     }
     fromPixels2DContext.canvas.width = width;
@@ -269,6 +269,33 @@ export async function fromPixelsAsync(
   return fromPixels_(inputs, numChannels);
 }
 
+function validateImgTensor(img: Tensor2D|Tensor3D) {
+  if (img.rank !== 2 && img.rank !== 3) {
+    throw new Error(
+        `toPixels only supports rank 2 or 3 tensors, got rank ${img.rank}.`);
+  }
+  const depth = img.rank === 2 ? 1 : img.shape[2];
+
+  if (depth > 4 || depth === 2) {
+    throw new Error(
+        `toPixels only supports depth of size ` +
+        `1, 3 or 4 but got ${depth}`);
+  }
+
+  if (img.dtype !== 'float32' && img.dtype !== 'int32') {
+    throw new Error(
+        `Unsupported type for toPixels: ${img.dtype}.` +
+        ` Please use float32 or int32 tensors.`);
+  }
+}
+
+function validateImageOptions(imageOptions: ImageOptions) {
+  const alpha = imageOptions ?.alpha || 1;
+  if (alpha > 1 || alpha < 0) {
+    throw new Error(`Alpha value ${alpha} is suppoed to be in range [0 - 1].`);
+  }
+}
+
 /**
  * Draws a `tf.Tensor` of pixel values to a byte array or optionally a
  * canvas.
@@ -299,25 +326,10 @@ export async function toPixels(
     $img = cast(originalImgTensor, 'int32');
     originalImgTensor.dispose();
   }
-  if ($img.rank !== 2 && $img.rank !== 3) {
-    throw new Error(
-        `toPixels only supports rank 2 or 3 tensors, got rank ${$img.rank}.`);
-  }
+  validateImgTensor($img);
+
   const [height, width] = $img.shape.slice(0, 2);
   const depth = $img.rank === 2 ? 1 : $img.shape[2];
-
-  if (depth > 4 || depth === 2) {
-    throw new Error(
-        `toPixels only supports depth of size ` +
-        `1, 3 or 4 but got ${depth}`);
-  }
-
-  if ($img.dtype !== 'float32' && $img.dtype !== 'int32') {
-    throw new Error(
-        `Unsupported type for toPixels: ${$img.dtype}.` +
-        ` Please use float32 or int32 tensors.`);
-  }
-
   const data = await $img.data();
   const multiplier = $img.dtype === 'float32' ? 255 : 1;
   const bytes = new Uint8ClampedArray(width * height * 4);
@@ -359,6 +371,13 @@ export async function toPixels(
   }
 
   if (canvas != null) {
+    if (!hasToPixelsWarned) {
+      console.warn(
+          'tf.browser.toPixels is not efficient to draw tensor on canvas. ' +
+          'Please try tf.browser.draw instead.');
+      hasToPixelsWarned = true;
+    }
+
     canvas.width = width;
     canvas.height = height;
     const ctx = canvas.getContext('2d');
@@ -371,4 +390,44 @@ export async function toPixels(
   return bytes;
 }
 
+/**
+ * Draws a `tf.Tensor` to a canvas.
+ *
+ * When the dtype of the input is 'float32', we assume values in the range
+ * [0-1]. Otherwise, when input is 'int32', we assume values in the range
+ * [0-255].
+ *
+ * @param image The tensor to draw on the canvas. Must match one of
+ * these shapes:
+ *   - Rank-2 with shape `[height, width`]: Drawn as grayscale.
+ *   - Rank-3 with shape `[height, width, 1]`: Drawn as grayscale.
+ *   - Rank-3 with shape `[height, width, 3]`: Drawn as RGB with alpha set in
+ *     `imageOptions` (defaults to 1, which is opaque).
+ *   - Rank-3 with shape `[height, width, 4]`: Drawn as RGBA.
+ * @param canvas The canvas to draw to.
+ * @param options The configuration arguments for image to be drawn and the
+ *     canvas to draw to.
+ *
+ * @doc {heading: 'Browser', namespace: 'browser'}
+ */
+export function draw(
+    image: Tensor2D|Tensor3D|TensorLike, canvas: HTMLCanvasElement,
+    options?: DrawOptions): void {
+  let $img = convertToTensor(image, 'img', 'draw');
+  if (!(image instanceof Tensor)) {
+    // Assume int32 if user passed a native array.
+    const originalImgTensor = $img;
+    $img = cast(originalImgTensor, 'int32');
+    originalImgTensor.dispose();
+  }
+  validateImgTensor($img);
+  validateImageOptions(options?.imageOptions);
+
+  const inputs: DrawInputs = {image: $img};
+  const attrs: DrawAttrs = {canvas, options};
+  ENGINE.runKernel(
+      Draw, inputs as unknown as NamedTensorMap,
+      attrs as unknown as NamedAttrMap);
+}
+
 export const fromPixels = /* @__PURE__ */ op({fromPixels_});
diff --git a/tfjs-core/src/ops/draw_test.ts b/tfjs-core/src/ops/draw_test.ts
new file mode 100644
index 00000000000..0b72254e180
--- /dev/null
+++ b/tfjs-core/src/ops/draw_test.ts
@@ -0,0 +1,104 @@
+/**
+ * @license
+ * Copyright 2023 Google LLC.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * =============================================================================
+ */
+
+import * as tf from '../index';
+import {BROWSER_ENVS, describeWithFlags} from '../jasmine_util';
+import {expectArraysClose, expectArraysEqual} from '../test_util';
+
+class MockContext {
+  data: ImageData;
+
+  getImageData() {
+    return this.data;
+  }
+
+  putImageData(data: ImageData, x: number, y: number) {
+    this.data = data;
+  }
+}
+
+class MockCanvas {
+  context: MockContext;
+
+  constructor(public width: number, public height: number) {}
+
+  getContext(type: '2d'): MockContext {
+    if (this.context == null) {
+      this.context = new MockContext();
+    }
+    return this.context;
+  }
+}
+
+describeWithFlags('Draw on 2d context', BROWSER_ENVS, () => {
+  it('draw image with 4 channels and int values', async () => {
+    const data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
+    const img = tf.tensor3d(data, [2, 2, 4], 'int32');
+    const canvas = new MockCanvas(2, 2);
+    const ctx = canvas.getContext('2d');
+
+    // tslint:disable-next-line:no-any
+    tf.browser.draw(img, canvas as any, {contextOptions: {contextType: '2d'}});
+    expectArraysEqual(ctx.getImageData().data, data);
+  });
+
+  it('draw image with 4 channels and float values', async () => {
+    const data =
+        [.1, .2, .3, .4, .5, .6, .7, .8, .9, .1, .11, .12, .13, .14, .15, .16];
+    const img = tf.tensor3d(data, [2, 2, 4]);
+    const canvas = new MockCanvas(2, 2);
+    const ctx = canvas.getContext('2d');
+
+    // tslint:disable-next-line:no-any
+    tf.browser.draw(img, canvas as any, {contextOptions: {contextType: '2d'}});
+    const actualData = ctx.getImageData().data;
+    const expectedData = data.map(e => Math.round(e * 255));
+    expectArraysClose(actualData, expectedData, 1);
+  });
+
+  it('draw 2D image in grayscale', async () => {
+    const data = [1, 2, 3, 4];
+    const img = tf.tensor2d(data, [2, 2], 'int32');
+    const canvas = new MockCanvas(2, 2);
+    const ctx = canvas.getContext('2d');
+
+    // tslint:disable-next-line:no-any
+    tf.browser.draw(img, canvas as any, {contextOptions: {contextType: '2d'}});
+    const actualData = ctx.getImageData().data;
+    const expectedData =
+        [1, 1, 1, 255, 2, 2, 2, 255, 3, 3, 3, 255, 4, 4, 4, 255];
+    expectArraysEqual(actualData, expectedData);
+  });
+
+  it('draw image with alpha=0.5', async () => {
+    const data = [1, 2, 3, 4];
+    const img = tf.tensor3d(data, [2, 2, 1], 'int32');
+    const canvas = new MockCanvas(2, 2);
+    const ctx = canvas.getContext('2d');
+
+    const drawOptions = {
+      contextOptions: {contextType: '2d'},
+      imageOptions: {alpha: 0.5}
+    };
+    // tslint:disable-next-line:no-any
+    tf.browser.draw(img, canvas as any, drawOptions);
+    const actualData = ctx.getImageData().data;
+    const expectedData =
+        [1, 1, 1, 128, 2, 2, 2, 128, 3, 3, 3, 128, 4, 4, 4, 128];
+    expectArraysEqual(actualData, expectedData);
+  });
+});
diff --git a/tfjs-core/src/types.ts b/tfjs-core/src/types.ts
index a31b60a013a..994e447ba9a 100644
--- a/tfjs-core/src/types.ts
+++ b/tfjs-core/src/types.ts
@@ -57,10 +57,9 @@ export interface SingleValueMap {
 export type DataType = keyof DataTypeMap;
 export type NumericDataType = 'float32'|'int32'|'bool'|'complex64';
 
-export type DataTypeFor<T extends number | string | boolean> =
-  T extends number | boolean ? NumericDataType :
-  T extends string ? 'string' :
-  never;
+export type DataTypeFor<T extends number|string|boolean> =
+    T extends number|boolean ? NumericDataType : T extends string ? 'string' :
+                                                                    never;
 
 export type TypedArray = Float32Array|Int32Array|Uint8Array;
 /** Tensor data used in tensor creation and user-facing API. */
@@ -204,15 +203,44 @@ export interface WebGPUData {
 }
 
 export function isWebGLData(values: unknown): values is WebGLData {
-  return values != null
-      && typeof values === 'object'
-      && 'texture' in values
-      && values.texture instanceof WebGLTexture;
+  return values != null && typeof values === 'object' && 'texture' in values &&
+      values.texture instanceof WebGLTexture;
 }
 export function isWebGPUData(values: unknown): values is WebGPUData {
-  return typeof GPUBuffer !== 'undefined'
-      && values != null
-      && typeof values === 'object'
-      && 'buffer' in values
-      && values.buffer instanceof GPUBuffer;
+  return typeof GPUBuffer !== 'undefined' && values != null &&
+      typeof values === 'object' && 'buffer' in values &&
+      values.buffer instanceof GPUBuffer;
+}
+
+export interface ImageOptions {
+  /**
+   * Optional. A number in range [0-1]. If the image is a 2D tensor or a 3D
+   * tensor with 1 or 3 channels, the alpha channels would set as its value;
+   * otherwise, it would not make effects.
+   */
+  alpha?: number;
+}
+
+export interface ContextOptions {
+  /**
+   * Optional.  If the canvas has created a context, it would not make effects.
+   * If it is not set, it would be variable based on the current backend.
+   */
+  contextType?: string;
+  /**
+   * Optional. A WebGLContextAttributes configuration. If the canvas has created
+   * a context, it would not make effects.
+   */
+  contextAttributes?: WebGLContextAttributes;
+}
+
+export interface DrawOptions {
+  /**
+   * Optional. An object of options to customize the values of image tensor.
+   */
+  imageOptions?: ImageOptions;
+  /**
+   * Optional. An object to configure the context of the canvas to draw to.
+   */
+  contextOptions?: ContextOptions;
 }

From 6549f583c6ea7737b8139f9663f434a97d31936a Mon Sep 17 00:00:00 2001
From: xhcao <xinghua.cao@intel.com>
Date: Mon, 8 May 2023 16:38:17 +0800
Subject: [PATCH 06/32] [WebGPU] Throw a warning for readSync (#7668)

* [WebGPU] Throw a warning for readSync
---
 tfjs-backend-webgpu/src/backend_webgpu.ts | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tfjs-backend-webgpu/src/backend_webgpu.ts b/tfjs-backend-webgpu/src/backend_webgpu.ts
index ac68025d57e..7ed5d1b1b7d 100644
--- a/tfjs-backend-webgpu/src/backend_webgpu.ts
+++ b/tfjs-backend-webgpu/src/backend_webgpu.ts
@@ -123,6 +123,7 @@ export class WebGPUBackend extends KernelBackend {
   private supportTimeQuery: boolean;
   private uniformPendingDisposal: GPUBuffer[] = [];
   private uploadWaitMs = 0;
+  private hasReadSyncWarned = false;
 
   private nextDataId(): number {
     return WebGPUBackend.nextDataId++;
@@ -387,6 +388,13 @@ export class WebGPUBackend extends KernelBackend {
   }
 
   override readSync(dataId: object): BackendValues {
+    if (!this.hasReadSyncWarned) {
+      this.hasReadSyncWarned = true;
+      console.warn(
+          `The performance of synchronously reading data from GPU to CPU is ` +
+          `poor on the webgpu backend, please use asynchronous APIs instead.`);
+    }
+
     const tensorData = this.tensorMap.get(dataId);
     const {values, complexTensorInfos} = tensorData;
 

From 26f4aa2a08d321841562a9d326fb54d6eb5eb48d Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 8 May 2023 09:15:42 -0700
Subject: [PATCH 07/32] build(deps): bump engine.io from 6.4.1 to 6.4.2 in
 /tfjs-vis (#7650)

Bumps [engine.io](https://github.com/socketio/engine.io) from 6.4.1 to 6.4.2.
- [Release notes](https://github.com/socketio/engine.io/releases)
- [Changelog](https://github.com/socketio/engine.io/blob/main/CHANGELOG.md)
- [Commits](https://github.com/socketio/engine.io/compare/6.4.1...6.4.2)

---
updated-dependencies:
- dependency-name: engine.io
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 tfjs-vis/yarn.lock | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tfjs-vis/yarn.lock b/tfjs-vis/yarn.lock
index d779df7720d..c1833a0ef4d 100644
--- a/tfjs-vis/yarn.lock
+++ b/tfjs-vis/yarn.lock
@@ -2977,9 +2977,9 @@ engine.io-parser@~5.0.3:
   integrity sha512-tjuoZDMAdEhVnSFleYPCtdL2GXwVTGtNjoeJd9IhIG3C1xs9uwxqRNEu5WpnDZCaozwVlK/nuQhpodhXSIMaxw==
 
 engine.io@~6.4.1:
-  version "6.4.1"
-  resolved "https://registry.yarnpkg.com/engine.io/-/engine.io-6.4.1.tgz#8056b4526a88e779f9c280d820422d4e3eeaaae5"
-  integrity sha512-JFYQurD/nbsA5BSPmbaOSLa3tSVj8L6o4srSwXXY3NqE+gGUNmmPTbhn8tjzcCtSqhFgIeqef81ngny8JM25hw==
+  version "6.4.2"
+  resolved "https://registry.yarnpkg.com/engine.io/-/engine.io-6.4.2.tgz#ffeaf68f69b1364b0286badddf15ff633476473f"
+  integrity sha512-FKn/3oMiJjrOEOeUub2WCox6JhxBXq/Zn3fZOMCBxKnNYtsdKjxhl7yR3fZhM9PV+rdE75SU5SYMc+2PGzo+Tg==
   dependencies:
     "@types/cookie" "^0.4.1"
     "@types/cors" "^2.8.12"

From 5a262525ad38c0df9c45221f07a29406f73b5bbd Mon Sep 17 00:00:00 2001
From: Matthew Soulanille <msoulanille@google.com>
Date: Mon, 8 May 2023 10:17:10 -0700
Subject: [PATCH 08/32] Move WebGPU out of alpha (#7643)

Move WebGPU out of alpha and version it with the rest of the TFJS
packages.
---
 scripts/release-util.ts | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/release-util.ts b/scripts/release-util.ts
index fe245d2940b..8b074a5aceb 100755
--- a/scripts/release-util.ts
+++ b/scripts/release-util.ts
@@ -153,8 +153,8 @@ export const E2E_PHASE: Phase = {
 export const TFJS_RELEASE_UNIT: ReleaseUnit = {
   name: 'tfjs',
   phases: [
-    CORE_PHASE, CPU_PHASE, WEBGL_PHASE, LAYERS_CONVERTER_PHASE, DATA_PHASE,
-    UNION_PHASE, NODE_PHASE, WASM_PHASE
+    CORE_PHASE, CPU_PHASE, WEBGL_PHASE, WEBGPU_PHASE, LAYERS_CONVERTER_PHASE,
+    DATA_PHASE, UNION_PHASE, NODE_PHASE, WASM_PHASE
   ]
 };
 
@@ -167,7 +167,7 @@ export const TFJS_RELEASE_UNIT: ReleaseUnit = {
 // replace 'link' dependencies with the new monorepo version.
 export const ALPHA_RELEASE_UNIT: ReleaseUnit = {
   name: 'alpha-monorepo-packages',
-  phases: [TFDF_PHASE, WEBGPU_PHASE],
+  phases: [TFDF_PHASE],
 };
 
 export const VIS_RELEASE_UNIT: ReleaseUnit = {

From ea1ece16e55a71d8a1a0b03f98e910682017769e Mon Sep 17 00:00:00 2001
From: fengwuyao <131706622+fengwuyao@users.noreply.github.com>
Date: Mon, 8 May 2023 12:12:18 -0700
Subject: [PATCH 09/32] add wasm backend for bitwiseAnd API (#7661)

---
 tfjs-backend-wasm/src/cc/BUILD.bazel          | 11 ++++
 .../src/cc/kernels/BitwiseAnd.cc              | 50 +++++++++++++++++++
 tfjs-backend-wasm/src/kernels/BitwiseAnd.ts   | 26 ++++++++++
 tfjs-backend-wasm/src/register_all_kernels.ts |  2 +
 tfjs-backend-wasm/src/setup_test.ts           |  7 +--
 5 files changed, 90 insertions(+), 6 deletions(-)
 create mode 100644 tfjs-backend-wasm/src/cc/kernels/BitwiseAnd.cc
 create mode 100644 tfjs-backend-wasm/src/kernels/BitwiseAnd.ts

diff --git a/tfjs-backend-wasm/src/cc/BUILD.bazel b/tfjs-backend-wasm/src/cc/BUILD.bazel
index 8aaf7998439..24567f34cc5 100644
--- a/tfjs-backend-wasm/src/cc/BUILD.bazel
+++ b/tfjs-backend-wasm/src/cc/BUILD.bazel
@@ -359,6 +359,7 @@ tfjs_cc_library(
         ":AvgPool3DGrad",
         ":BatchMatMul",
         ":Bincount",
+        ":BitwiseAnd",
         ":Ceil",
         ":ClipByValue",
         ":Conv2D",
@@ -648,6 +649,16 @@ tfjs_cc_library(
     ],
 )
 
+tfjs_cc_library(
+    name = "BitwiseAnd",
+    srcs = ["kernels/BitwiseAnd.cc"],
+    deps = [
+        ":backend",
+        ":binary",
+        ":util",
+    ],
+)
+
 tfjs_cc_library(
     name = "_FusedMatMul",
     srcs = ["kernels/_FusedMatMul.cc"],
diff --git a/tfjs-backend-wasm/src/cc/kernels/BitwiseAnd.cc b/tfjs-backend-wasm/src/cc/kernels/BitwiseAnd.cc
new file mode 100644
index 00000000000..d73ccaee2a6
--- /dev/null
+++ b/tfjs-backend-wasm/src/cc/kernels/BitwiseAnd.cc
@@ -0,0 +1,50 @@
+/* Copyright 2023 Google LLC.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ===========================================================================*/
+#ifdef __EMSCRIPTEN__
+#include <emscripten.h>
+#endif
+#include <cstddef>
+
+#include "tfjs-backend-wasm/src/cc/binary.h"
+#include "tfjs-backend-wasm/src/cc/util.h"
+
+namespace {
+template <typename T>
+inline T BitwiseAndImp(T a, T b) {
+  return a & b;
+}
+}  // namespace
+
+namespace tfjs {
+namespace wasm {
+// We use C-style API to interface with Javascript.
+extern "C" {
+#ifdef __EMSCRIPTEN__
+EMSCRIPTEN_KEEPALIVE
+#endif
+// REQUIRES:
+// - Tensor `a` and `b` must have dtype int32 (checked in tfjs-core)
+// - Tensor `a` and `b` must have the same shape (checked in tfjs-core)
+void BitwiseAnd(const size_t a_id, const size_t* a_shape_ptr,
+                const size_t a_shape_len, const size_t b_id,
+                const size_t* b_shape_ptr, const size_t b_shape_len,
+                const DType dtype, const size_t out_id) {
+  binary_i32(a_id, a_shape_ptr, a_shape_len, b_id, b_shape_ptr, b_shape_len,
+             out_id,
+             BitwiseAndImp<int32_t>);  // input numbers are ensured to be int32
+}
+
+}  // extern "C"
+}  // namespace wasm
+}  // namespace tfjs
diff --git a/tfjs-backend-wasm/src/kernels/BitwiseAnd.ts b/tfjs-backend-wasm/src/kernels/BitwiseAnd.ts
new file mode 100644
index 00000000000..a761c435c5f
--- /dev/null
+++ b/tfjs-backend-wasm/src/kernels/BitwiseAnd.ts
@@ -0,0 +1,26 @@
+
+/**
+ * @license
+ * Copyright 2023 Google LLC.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * =============================================================================
+ */
+
+import {BitwiseAnd, KernelConfig} from '@tensorflow/tfjs-core';
+
+import {createBinaryKernelConfig} from './binary_kernel';
+
+const supportsFullBroadcast = true;
+
+export const bitwiseAndConfig: KernelConfig =
+    createBinaryKernelConfig(BitwiseAnd, supportsFullBroadcast);
diff --git a/tfjs-backend-wasm/src/register_all_kernels.ts b/tfjs-backend-wasm/src/register_all_kernels.ts
index 561e6e6d63c..80ab27351b3 100644
--- a/tfjs-backend-wasm/src/register_all_kernels.ts
+++ b/tfjs-backend-wasm/src/register_all_kernels.ts
@@ -40,6 +40,7 @@ import {avgPool3DGradConfig} from './kernels/AvgPool3DGrad';
 import {batchMatMulConfig} from './kernels/BatchMatMul';
 import {batchToSpaceNDConfig} from './kernels/BatchToSpaceND';
 import {bincountConfig} from './kernels/Bincount';
+import {bitwiseAndConfig} from './kernels/BitwiseAnd';
 import {broadcastArgsConfig} from './kernels/BroadcastArgs';
 import {castConfig} from './kernels/Cast';
 import {ceilConfig} from './kernels/Ceil';
@@ -193,6 +194,7 @@ const kernelConfigs: KernelConfig[] = [
   batchMatMulConfig,
   batchToSpaceNDConfig,
   bincountConfig,
+  bitwiseAndConfig,
   broadcastArgsConfig,
   castConfig,
   ceilConfig,
diff --git a/tfjs-backend-wasm/src/setup_test.ts b/tfjs-backend-wasm/src/setup_test.ts
index 021ad8eb5e0..7f29d7acb87 100644
--- a/tfjs-backend-wasm/src/setup_test.ts
+++ b/tfjs-backend-wasm/src/setup_test.ts
@@ -290,12 +290,6 @@ const TEST_FILTERS: TestFilter[] = [
   {startsWith: 'logicalNot '},
   {startsWith: 'logicalOr '},
   {startsWith: 'logicalXor '},
-  {
-    startsWith: 'bitwiseAnd',
-    excludes: [
-      'bitwiseAnd',
-    ]
-  },
   {
     startsWith: 'tile ',
     excludes: [
@@ -396,6 +390,7 @@ const TEST_FILTERS: TestFilter[] = [
   {include: 'asinh '},
   {include: 'diag '},
   {include: 'denseBincount '},
+  {include: 'bitwiseAnd'},
   {include: 'broadcastArgs '},
   {include: 'searchSorted '},
   {include: 'avgPool3d '},

From f2b37e55883c474149082f1b1b30108ca0df4af0 Mon Sep 17 00:00:00 2001
From: chunnienc <121328115+chunnienc@users.noreply.github.com>
Date: Tue, 9 May 2023 15:07:25 -0700
Subject: [PATCH 10/32] [wasm] Add Sinh kernel (#7671)

* Added sinh kernel

* Updated the files

* Install bazel

* remove the typo

* Updated the package.json file'

* update on package.json file

* Updated the buildifier

* Added sinh in the file

* Updated WORKSPACE

* Updated the files

* Updated

* Updated the files

* Updated yarn.lock

* Updated yarn.lock file

* minor fixes with c_ccp_properties.json files

* minor fixes with c_ccp_properties.json files

* Rollback vscode settings

* Fix implemetation

* Fix

* Update license header

* Rollback vscode config

* Update header

---------

Co-authored-by: Kartikey <rawatkari554@gmail.com>
Co-authored-by: Kartikey Rawat <41143496+carrycooldude@users.noreply.github.com>
Co-authored-by: Ping Yu <4018+pyu10055@users.noreply.github.com>
---
 .vscode/settings.json                         |  2 +-
 tfjs-backend-wasm/src/cc/BUILD.bazel          | 10 ++++
 tfjs-backend-wasm/src/cc/kernels/Sinh.cc      | 58 +++++++++++++++++++
 tfjs-backend-wasm/src/kernels/Sinh.ts         | 21 +++++++
 tfjs-backend-wasm/src/register_all_kernels.ts |  2 +
 tfjs-backend-wasm/src/setup_test.ts           |  1 +
 6 files changed, 93 insertions(+), 1 deletion(-)
 create mode 100644 tfjs-backend-wasm/src/cc/kernels/Sinh.cc
 create mode 100644 tfjs-backend-wasm/src/kernels/Sinh.ts

diff --git a/.vscode/settings.json b/.vscode/settings.json
index 00f147af1f3..4ad1d9a65b1 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -12,7 +12,7 @@
     "**/bazel-out/**/*": true,
     "**/bazel-genfiles/**/*": true,
     "**/bazel-testlogs/**/*": true,
-    "**/bazel-tfjs*/**/*": true,
+    "**/bazel-tfjs*/**/*": true
   },
   "files.trimTrailingWhitespace": true,
   "editor.tabSize": 2,
diff --git a/tfjs-backend-wasm/src/cc/BUILD.bazel b/tfjs-backend-wasm/src/cc/BUILD.bazel
index 24567f34cc5..347a3934fa4 100644
--- a/tfjs-backend-wasm/src/cc/BUILD.bazel
+++ b/tfjs-backend-wasm/src/cc/BUILD.bazel
@@ -436,6 +436,7 @@ tfjs_cc_library(
         ":Sigmoid",
         ":Sign",
         ":Sin",
+        ":Sinh",
         ":Softmax",
         ":Softplus",
         ":SparseFillEmptyRows",
@@ -1524,6 +1525,15 @@ tfjs_cc_library(
     ],
 )
 
+tfjs_cc_library(
+    name = "Sinh",
+    srcs = ["kernels/Sinh.cc"],
+    deps = [
+        ":unary",
+        ":util",
+    ],
+)
+
 tfjs_cc_library(
     name = "Softmax",
     srcs = ["kernels/Softmax.cc"],
diff --git a/tfjs-backend-wasm/src/cc/kernels/Sinh.cc b/tfjs-backend-wasm/src/cc/kernels/Sinh.cc
new file mode 100644
index 00000000000..dff6d07faad
--- /dev/null
+++ b/tfjs-backend-wasm/src/cc/kernels/Sinh.cc
@@ -0,0 +1,58 @@
+/* Copyright 2023 Google LLC.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ===========================================================================*/
+#ifdef __EMSCRIPTEN__
+#include <emscripten.h>
+#endif
+
+#include <cmath>
+
+#include "tfjs-backend-wasm/src/cc/unary.h"
+#include "tfjs-backend-wasm/src/cc/util.h"
+
+namespace tfjs {
+namespace wasm {
+
+namespace {
+
+template <typename T>
+inline T SinhImpl(T x) {
+  return static_cast<T>(std::sinhf(static_cast<float>(x)));
+}
+
+}  // namespace
+
+// We use C-style API to interface with Javascript.
+extern "C" {
+
+#ifdef __EMSCRIPTEN__
+EMSCRIPTEN_KEEPALIVE
+#endif
+
+void Sinh(const int x_id, const DType dtype, const int out_id) {
+  switch (dtype) {
+    case DType::float32:
+      unary_f32(x_id, out_id, SinhImpl<float>);
+      break;
+    case DType::int32:
+      unary_i32(x_id, out_id, SinhImpl<int32_t>);
+      break;
+    default:
+      util::warn("Sinh for tensor id %d failed. Unsupported dtype %d", x_id,
+                 dtype);
+  }
+}
+
+}  // extern "C"
+}  // namespace wasm
+}  // namespace tfjs
diff --git a/tfjs-backend-wasm/src/kernels/Sinh.ts b/tfjs-backend-wasm/src/kernels/Sinh.ts
new file mode 100644
index 00000000000..1506965e3ea
--- /dev/null
+++ b/tfjs-backend-wasm/src/kernels/Sinh.ts
@@ -0,0 +1,21 @@
+/**
+ * @license
+ * Copyright 2023 Google LLC.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * =============================================================================
+ */
+import {KernelConfig, Sinh} from '@tensorflow/tfjs-core';
+
+import {createUnaryKernelConfig} from './unary_kernel';
+
+export const sinhConfig: KernelConfig = createUnaryKernelConfig(Sinh);
diff --git a/tfjs-backend-wasm/src/register_all_kernels.ts b/tfjs-backend-wasm/src/register_all_kernels.ts
index 80ab27351b3..303f315d182 100644
--- a/tfjs-backend-wasm/src/register_all_kernels.ts
+++ b/tfjs-backend-wasm/src/register_all_kernels.ts
@@ -140,6 +140,7 @@ import {seluConfig} from './kernels/Selu';
 import {sigmoidConfig} from './kernels/Sigmoid';
 import {signConfig} from './kernels/Sign';
 import {sinConfig} from './kernels/Sin';
+import {sinhConfig} from './kernels/Sinh';
 import {sliceConfig} from './kernels/Slice';
 import {softmaxConfig} from './kernels/Softmax';
 import {softplusConfig} from './kernels/Softplus';
@@ -294,6 +295,7 @@ const kernelConfigs: KernelConfig[] = [
   sigmoidConfig,
   signConfig,
   sinConfig,
+  sinhConfig,
   sliceConfig,
   softmaxConfig,
   softplusConfig,
diff --git a/tfjs-backend-wasm/src/setup_test.ts b/tfjs-backend-wasm/src/setup_test.ts
index 7f29d7acb87..4fdcac5decd 100644
--- a/tfjs-backend-wasm/src/setup_test.ts
+++ b/tfjs-backend-wasm/src/setup_test.ts
@@ -298,6 +298,7 @@ const TEST_FILTERS: TestFilter[] = [
     ]
   },
   {startsWith: 'sin '},
+  {startsWith: 'sinh '},
   {
     startsWith: 'cos ',
     excludes: [

From ab7717d9eae7f32759d1fcd68bfa0c61f980c340 Mon Sep 17 00:00:00 2001
From: Jiajia Qin <jiajia.qin@intel.com>
Date: Wed, 10 May 2023 15:01:17 +0800
Subject: [PATCH 11/32] webgpu: Optimize depthwise conv2d (#7660)

* webgpu: Optimize depthwise conv2d

This PR uses linear workgroup size to optimize depthwise conv2d.

See 150% improvement for the time of FusedDepthwiseConv2D on
BlazePoseDetector.

* use size to do checking

* fix boundary detection

* use virtual outputshape

* nits
---
 .../src/depthwise_conv2d_vec4_webgpu.ts       | 37 +++++++++++++------
 .../src/kernels/DepthwiseConv2dNative.ts      |  1 +
 .../src/kernels/FusedDepthwiseConv2D.ts       |  1 +
 3 files changed, 27 insertions(+), 12 deletions(-)

diff --git a/tfjs-backend-webgpu/src/depthwise_conv2d_vec4_webgpu.ts b/tfjs-backend-webgpu/src/depthwise_conv2d_vec4_webgpu.ts
index d5b310d3ecf..70ed0645b05 100644
--- a/tfjs-backend-webgpu/src/depthwise_conv2d_vec4_webgpu.ts
+++ b/tfjs-backend-webgpu/src/depthwise_conv2d_vec4_webgpu.ts
@@ -18,31 +18,39 @@
 import {backend_util, util} from '@tensorflow/tfjs-core';
 import {activationFnSnippet, biasActivationSnippet} from './activation_util';
 import {getMainHeaderString as main, WebGPUProgram} from './webgpu_program';
-import {computeDispatch} from './webgpu_util';
+import {computeDispatch, flatDispatchLayout} from './webgpu_util';
 
 export class DepthwiseConv2DVec4Program implements WebGPUProgram {
   outputShape: number[];
   shaderKey: string;
-  dispatchLayout: {x: number[], y: number[], z: number[]};
+  dispatchLayout: {x: number[]};
   dispatch: [number, number, number];
   variableNames = ['x', 'W'];
-  uniforms = 'pads : vec2<i32>, inDims : vec2<i32>,';
-  workgroupSize: [number, number, number] = [4, 4, 4];
+  uniforms = 'pads : vec2<i32>, inDims : vec2<i32>, virtualWidth : i32,';
+  workgroupSize: [number, number, number] = [64, 1, 1];
   workPerThread = 4;
   convInfo: backend_util.Conv2DInfo;
   addBias: boolean;
   activation: backend_util.Activation;
   hasPreluActivation: boolean;
   outputComponent = 4;
+  virtualWidth: number;
 
   constructor(
       convInfo: backend_util.Conv2DInfo, addBias = false,
       activation: backend_util.Activation = null, hasPreluActivation = false) {
     this.outputShape = convInfo.outShape;
-    this.dispatchLayout = {x: [3], y: [2], z: [0, 1]};
+    this.virtualWidth = Math.ceil(this.outputShape[2] / this.workPerThread) *
+        this.workPerThread;
+    const virtualOutputShape = [
+      this.outputShape[0], this.outputShape[1], this.virtualWidth,
+      this.outputShape[3]
+    ];
+    this.dispatchLayout = flatDispatchLayout(virtualOutputShape);
+
     this.dispatch = computeDispatch(
-        this.dispatchLayout, this.outputShape, this.workgroupSize,
-        [4, this.workPerThread, 1]);
+        this.dispatchLayout, virtualOutputShape, this.workgroupSize,
+        [this.outputComponent * this.workPerThread, 1, 1]);
 
     util.assert(
         convInfo.dataFormat === 'channelsLast',
@@ -82,11 +90,16 @@ export class DepthwiseConv2DVec4Program implements WebGPUProgram {
         return value;
       }
 
-      ${main()} {
-        let batch = i32(globalId.z) / uniforms.outShape[1];
-        let r = i32(globalId.z) % uniforms.outShape[1];
-        let c = i32(globalId.y) * ${this.workPerThread};
-        let d1 = i32(globalId.x) * 4;
+      ${main('index')} {
+        let width0 = uniforms.outShape[3] / ${this.outputComponent};
+        let d1 = (index % width0) * ${this.outputComponent};
+        var index1 = index / width0;
+        let width1 = uniforms.virtualWidth / ${this.workPerThread};
+        let c = (index1 % width1) * ${this.workPerThread};
+        index1 = index1 / width1;
+        let r = index1 % uniforms.outShape[1];
+        let batch = index1 / uniforms.outShape[1];
+
         let xRCCorner = vec2<i32>(r, c) * vec2<i32>(${strideHeight}, ${
         strideWidth}) - uniforms.pads;
 
diff --git a/tfjs-backend-webgpu/src/kernels/DepthwiseConv2dNative.ts b/tfjs-backend-webgpu/src/kernels/DepthwiseConv2dNative.ts
index 52ada5ee093..28fa2226bc0 100644
--- a/tfjs-backend-webgpu/src/kernels/DepthwiseConv2dNative.ts
+++ b/tfjs-backend-webgpu/src/kernels/DepthwiseConv2dNative.ts
@@ -61,6 +61,7 @@ export function depthwiseConv2dNative(args: {
       convInfo.dilationHeight === 1 && convInfo.dilationWidth === 1 &&
       convInfo.inChannels % 4 === 0) {
     program = new DepthwiseConv2DVec4Program(convInfo);
+    dimensions.push({type: 'int32', data: [program.virtualWidth]});
   } else {
     program = new DepthwiseConv2DProgram(convInfo);
     dimensions.push(
diff --git a/tfjs-backend-webgpu/src/kernels/FusedDepthwiseConv2D.ts b/tfjs-backend-webgpu/src/kernels/FusedDepthwiseConv2D.ts
index 8382d8826ff..9660d1367ea 100644
--- a/tfjs-backend-webgpu/src/kernels/FusedDepthwiseConv2D.ts
+++ b/tfjs-backend-webgpu/src/kernels/FusedDepthwiseConv2D.ts
@@ -71,6 +71,7 @@ export function fusedDepthwiseConv2D(args: {
       convInfo.inChannels % 4 === 0) {
     program = new DepthwiseConv2DVec4Program(
         convInfo, hasBias, activation, hasPreluActivationWeights);
+    dimensions.push({type: 'int32', data: [program.virtualWidth]});
   } else {
     program = new DepthwiseConv2DProgram(
         convInfo, hasBias, activation, hasPreluActivationWeights);

From bf454a86c4bbe47c2182fec6ed77aa2975a5c32c Mon Sep 17 00:00:00 2001
From: Jiajia Qin <jiajia.qin@intel.com>
Date: Wed, 10 May 2023 16:31:16 +0800
Subject: [PATCH 12/32] webgpu: Return true for pending disposal tensors in
 disposeData (#7634)

* webgpu: Fix potential tensor leak issue.

* simplify code

* address comments

* address comments

* return true for pending memory

* nits

* nits

---------

Co-authored-by: Yang Gu <yang.gu@intel.com>
---
 tfjs-backend-webgpu/src/backend_webgpu.ts     | 35 ++++++++++---------
 .../src/backend_webgpu_test.ts                |  2 +-
 2 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/tfjs-backend-webgpu/src/backend_webgpu.ts b/tfjs-backend-webgpu/src/backend_webgpu.ts
index 7ed5d1b1b7d..4307813e1af 100644
--- a/tfjs-backend-webgpu/src/backend_webgpu.ts
+++ b/tfjs-backend-webgpu/src/backend_webgpu.ts
@@ -178,35 +178,36 @@ export class WebGPUBackend extends KernelBackend {
 
   /**
    * Dispose the memory if the dataId has 0 refCount. Return true if the memory
-   * is released or memory is not managed in this backend, false if memory is
-   * not cleared.
+   * is released or delayed in this backend, false if there are still
+   * references.
    * @param dataId
    * @oaram force Optional, remove the data regardless of refCount
    */
   override disposeData(dataId: DataId, force = false): boolean {
-    if (this.tensorDataPendingDisposal.indexOf(dataId) >= 0) {
-      return false;
-    }
+    // No-op if already disposed.
     if (!this.tensorMap.has(dataId)) {
       return true;
     }
 
     const tensorData = this.tensorMap.get(dataId);
-    this.decRef(dataId);
-    if (!force && tensorData.refCount > 0) {
-      return false;
+    if (force) {
+      tensorData.refCount = 0;
+    } else {
+      tensorData.refCount--;
     }
 
-    // complex is never in commandQueueOwnedIds
-    if (this.commandQueueOwnedIds.has(dataId)) {
-      this.tensorDataPendingDisposal.push(dataId);
+    if (tensorData.refCount > 0) {
       return false;
     }
 
-    const {complexTensorInfos} = this.tensorMap.get(dataId);
-    if (complexTensorInfos != null) {
-      this.disposeData(complexTensorInfos.real.dataId, force);
-      this.disposeData(complexTensorInfos.imag.dataId, force);
+    if (tensorData.complexTensorInfos != null) {
+      this.disposeData(tensorData.complexTensorInfos.real.dataId);
+      this.disposeData(tensorData.complexTensorInfos.imag.dataId);
+    }
+
+    if (this.commandQueueOwnedIds.has(dataId)) {
+      this.tensorDataPendingDisposal.push(dataId);
+      return true;
     }
 
     this.releaseResource(dataId);
@@ -223,7 +224,7 @@ export class WebGPUBackend extends KernelBackend {
     } as WebGPUMemoryInfo;
   }
 
-  releaseResource(dataId: DataId) {
+  private releaseResource(dataId: DataId) {
     const tensorData = this.tensorMap.get(dataId);
     if (!tensorData || !tensorData.resource) {
       return;
@@ -300,6 +301,7 @@ export class WebGPUBackend extends KernelBackend {
       this.releaseResource(d);
       this.tensorMap.delete(d);
     });
+
     this.uniformPendingDisposal.forEach(
         b => this.bufferManager.releaseBuffer(b));
     this.stagingPendingDisposal.forEach(
@@ -382,7 +384,6 @@ export class WebGPUBackend extends KernelBackend {
   private convertAndCacheOnCPU(dataId: DataId, data: BackendValues):
       BackendValues {
     const tensorData = this.tensorMap.get(dataId);
-    this.releaseResource(dataId);
     tensorData.values = data;
     return tensorData.values;
   }
diff --git a/tfjs-backend-webgpu/src/backend_webgpu_test.ts b/tfjs-backend-webgpu/src/backend_webgpu_test.ts
index e8c35d6ff6f..ed8149f409a 100644
--- a/tfjs-backend-webgpu/src/backend_webgpu_test.ts
+++ b/tfjs-backend-webgpu/src/backend_webgpu_test.ts
@@ -60,7 +60,7 @@ describeWebGPU('backend webgpu cpu forwarding turned on', () => {
 
     expect(endNumBytes - startNumBytes).toEqual(48);
     expect(endNumTensors - startNumTensors).toEqual(2);
-    expect(endNumBytesInGPU - startNumBytesInGPU).toEqual(40);
+    expect(endNumBytesInGPU - startNumBytesInGPU).toEqual(64);
 
     expectArraysClose(dData, new Float32Array([9, 12, 15, 19, 26, 33]));
   });

From f62dd9d9996c1940c5995be9f444c74a129a280e Mon Sep 17 00:00:00 2001
From: Matthew Soulanille <msoulanille@google.com>
Date: Wed, 10 May 2023 16:54:57 -0700
Subject: [PATCH 13/32] Disable e2e WebGPU golden model tests (#7677)

WebGPU golden model tests are freezing on BrowserStack for unknown reasons (they work locally). Disable them for now to unblock presubmits.
---
 e2e/integration_tests/graph_model_golden_tests.ts | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/e2e/integration_tests/graph_model_golden_tests.ts b/e2e/integration_tests/graph_model_golden_tests.ts
index e467e5760cf..5d091abe28d 100644
--- a/e2e/integration_tests/graph_model_golden_tests.ts
+++ b/e2e/integration_tests/graph_model_golden_tests.ts
@@ -20,7 +20,7 @@ import '@tensorflow/tfjs-backend-webgl';
 import * as tfconverter from '@tensorflow/tfjs-converter';
 import * as tfc from '@tensorflow/tfjs-core';
 // tslint:disable-next-line: no-imports-from-dist
-import {ALL_ENVS, describeWithFlags} from '@tensorflow/tfjs-core/dist/jasmine_util';
+import {Constraints, describeWithFlags} from '@tensorflow/tfjs-core/dist/jasmine_util';
 
 import {GOLDEN, KARMA_SERVER} from './constants';
 import * as GOLDEN_MODEL_DATA_FILENAMES from './graph_model_golden_data/filenames.json';
@@ -30,7 +30,14 @@ import {GraphModeGoldenData, TensorDetail} from './types';
 const DATA_URL = 'graph_model_golden_data';
 const INTERMEDIATE_NODE_TESTS_NUM = 5;
 
-describeWithFlags(`${GOLDEN} graph_model_golden`, ALL_ENVS, (env) => {
+// WebGPU freezes when running mobilenet on BrowserStack, so disable it for
+// automated tests until it's working.
+// TODO(mattSoulanille); Enable WebGPU golden file tests.
+const NO_WEBGPU: Constraints = {
+  predicate: env => env.backendName !== 'webgpu'
+}
+
+describeWithFlags(`${GOLDEN} graph_model_golden`, NO_WEBGPU, (env) => {
   let originalTimeout: number;
 
   beforeAll(async () => {
@@ -40,7 +47,7 @@ describeWithFlags(`${GOLDEN} graph_model_golden`, ALL_ENVS, (env) => {
     // https://jasmine.github.io/2.0/introduction.html#section-42
     originalTimeout = jasmine.DEFAULT_TIMEOUT_INTERVAL;
     jasmine.DEFAULT_TIMEOUT_INTERVAL = 1000000;
-    await tfc.setBackend(env.name);
+    await tfc.setBackend(env.backendName);
   });
 
   afterAll(() => jasmine.DEFAULT_TIMEOUT_INTERVAL = originalTimeout);

From 08b81e89237e19418aac10939e8c57c81ae59a90 Mon Sep 17 00:00:00 2001
From: chunnienc <121328115+chunnienc@users.noreply.github.com>
Date: Wed, 10 May 2023 18:43:00 -0700
Subject: [PATCH 14/32] [wasm] Add Erf kernel (#7669)

* add erf kernel to wasm

* address comments

* fix build

* Update  implementation

* Update license header

---------

Co-authored-by: Alvin Sun <alvinsunyixiao@gmail.com>
---
 tfjs-backend-wasm/src/cc/BUILD.bazel          | 10 ++++
 tfjs-backend-wasm/src/cc/kernels/Erf.cc       | 53 +++++++++++++++++++
 tfjs-backend-wasm/src/kernels/Erf.ts          | 22 ++++++++
 tfjs-backend-wasm/src/register_all_kernels.ts |  2 +
 tfjs-backend-wasm/src/setup_test.ts           |  1 +
 5 files changed, 88 insertions(+)
 create mode 100644 tfjs-backend-wasm/src/cc/kernels/Erf.cc
 create mode 100644 tfjs-backend-wasm/src/kernels/Erf.ts

diff --git a/tfjs-backend-wasm/src/cc/BUILD.bazel b/tfjs-backend-wasm/src/cc/BUILD.bazel
index 347a3934fa4..65f5836d083 100644
--- a/tfjs-backend-wasm/src/cc/BUILD.bazel
+++ b/tfjs-backend-wasm/src/cc/BUILD.bazel
@@ -380,6 +380,7 @@ tfjs_cc_library(
         ":Dilation2DBackpropInput",
         ":Elu",
         ":Equal",
+        ":Erf",
         ":Exp",
         ":FlipLeftRight",
         ":FloorDiv",
@@ -913,6 +914,15 @@ tfjs_cc_library(
     ],
 )
 
+tfjs_cc_library(
+    name = "Erf",
+    srcs = ["kernels/Erf.cc"],
+    deps = [
+        ":unary",
+        ":util",
+    ],
+)
+
 tfjs_cc_library(
     name = "Exp",
     srcs = ["kernels/Exp.cc"],
diff --git a/tfjs-backend-wasm/src/cc/kernels/Erf.cc b/tfjs-backend-wasm/src/cc/kernels/Erf.cc
new file mode 100644
index 00000000000..70fe654b08f
--- /dev/null
+++ b/tfjs-backend-wasm/src/cc/kernels/Erf.cc
@@ -0,0 +1,53 @@
+/* Copyright 2023 Google LLC.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ===========================================================================*/
+
+#ifdef __EMSCRIPTEN__
+#include <emscripten.h>
+#endif
+
+#include <cmath>
+
+#include "tfjs-backend-wasm/src/cc/unary.h"
+#include "tfjs-backend-wasm/src/cc/util.h"
+
+namespace tfjs {
+namespace wasm {
+
+namespace {
+template <typename T>
+inline T ErfImpl(T n) {
+  return static_cast<T>(std::erff(static_cast<float>(n)));
+}
+}  // namespace
+
+extern "C" {
+
+#ifdef __EMSCRIPTEN__
+EMSCRIPTEN_KEEPALIVE
+#endif
+
+void Erf(const int x_id, const DType dtype, const int out_id) {
+  switch (dtype) {
+    case DType::float32:
+      unary_f32(x_id, out_id, ErfImpl<float>);
+      break;
+    default:
+      util::warn("Erf for tensor id %d failed. Unsupported dtype %d", x_id,
+                 dtype);
+  }
+}
+
+}  // extern "C"
+}  // namespace wasm
+}  // namespace tfjs
diff --git a/tfjs-backend-wasm/src/kernels/Erf.ts b/tfjs-backend-wasm/src/kernels/Erf.ts
new file mode 100644
index 00000000000..af39a05a4a9
--- /dev/null
+++ b/tfjs-backend-wasm/src/kernels/Erf.ts
@@ -0,0 +1,22 @@
+/**
+ * @license
+ * Copyright 2023 Google LLC.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * =============================================================================
+ */
+
+import {Erf, KernelConfig} from '@tensorflow/tfjs-core';
+
+import {createUnaryKernelConfig} from './unary_kernel';
+
+export const erfConfig: KernelConfig = createUnaryKernelConfig(Erf);
diff --git a/tfjs-backend-wasm/src/register_all_kernels.ts b/tfjs-backend-wasm/src/register_all_kernels.ts
index 303f315d182..f2cfc346771 100644
--- a/tfjs-backend-wasm/src/register_all_kernels.ts
+++ b/tfjs-backend-wasm/src/register_all_kernels.ts
@@ -66,6 +66,7 @@ import {dilation2DBackpropInputConfig} from './kernels/Dilation2DBackpropInput';
 import {eluConfig} from './kernels/Elu';
 import {eluGradConfig} from './kernels/EluGrad';
 import {equalConfig} from './kernels/Equal';
+import {erfConfig} from './kernels/Erf';
 import {expConfig} from './kernels/Exp';
 import {expandDimsConfig} from './kernels/ExpandDims';
 import {expm1Config} from './kernels/Expm1';
@@ -221,6 +222,7 @@ const kernelConfigs: KernelConfig[] = [
   eluConfig,
   eluGradConfig,
   equalConfig,
+  erfConfig,
   expConfig,
   expandDimsConfig,
   expm1Config,
diff --git a/tfjs-backend-wasm/src/setup_test.ts b/tfjs-backend-wasm/src/setup_test.ts
index 4fdcac5decd..42175b7b643 100644
--- a/tfjs-backend-wasm/src/setup_test.ts
+++ b/tfjs-backend-wasm/src/setup_test.ts
@@ -297,6 +297,7 @@ const TEST_FILTERS: TestFilter[] = [
       'string tensor'  // String tensors not yet implemented.
     ]
   },
+  {startsWith: 'erf'},
   {startsWith: 'sin '},
   {startsWith: 'sinh '},
   {

From 287dc7f84c9460352f9d82cda5ac5926c4f30192 Mon Sep 17 00:00:00 2001
From: chunnienc <121328115+chunnienc@users.noreply.github.com>
Date: Thu, 11 May 2023 10:45:28 -0700
Subject: [PATCH 15/32] [wasm] Add AvgPoolGrad and MaxPoolGrad kernel (#7672)

* wip

* Add AvgPool3D kernel

* Add MaxPool3D kernel

* fix

* Fix template types order

* Update param channel_size

* Update headers and build deps


remove

* Add AvgPool3DGrad

* Add MaxPool3DGrad

* Fix wasm func signature

* Add AvgPoolGrad

* Add MaxPoolGrad

* Fix
---
 tfjs-backend-wasm/src/cc/BUILD.bazel          | 20 ++++
 .../src/cc/kernels/AvgPoolGrad.cc             | 79 +++++++++++++++
 .../src/cc/kernels/MaxPoolGrad.cc             | 98 +++++++++++++++++++
 tfjs-backend-wasm/src/kernels/AvgPoolGrad.ts  | 97 ++++++++++++++++++
 tfjs-backend-wasm/src/kernels/MaxPoolGrad.ts  | 95 ++++++++++++++++++
 tfjs-backend-wasm/src/register_all_kernels.ts |  4 +
 tfjs-backend-wasm/src/setup_test.ts           |  8 +-
 7 files changed, 394 insertions(+), 7 deletions(-)
 create mode 100644 tfjs-backend-wasm/src/cc/kernels/AvgPoolGrad.cc
 create mode 100644 tfjs-backend-wasm/src/cc/kernels/MaxPoolGrad.cc
 create mode 100644 tfjs-backend-wasm/src/kernels/AvgPoolGrad.ts
 create mode 100644 tfjs-backend-wasm/src/kernels/MaxPoolGrad.ts

diff --git a/tfjs-backend-wasm/src/cc/BUILD.bazel b/tfjs-backend-wasm/src/cc/BUILD.bazel
index 65f5836d083..5ea8f725748 100644
--- a/tfjs-backend-wasm/src/cc/BUILD.bazel
+++ b/tfjs-backend-wasm/src/cc/BUILD.bazel
@@ -357,6 +357,7 @@ tfjs_cc_library(
         ":AvgPool",
         ":AvgPool3D",
         ":AvgPool3DGrad",
+        ":AvgPoolGrad",
         ":BatchMatMul",
         ":Bincount",
         ":BitwiseAnd",
@@ -405,6 +406,7 @@ tfjs_cc_library(
         ":MaxPool",
         ":MaxPool3D",
         ":MaxPool3DGrad",
+        ":MaxPoolGrad",
         ":Maximum",
         ":Min",
         ":Minimum",
@@ -603,6 +605,15 @@ tfjs_cc_library(
     ],
 )
 
+tfjs_cc_library(
+    name = "AvgPoolGrad",
+    srcs = ["kernels/AvgPoolGrad.cc"],
+    deps = [
+        ":backend",
+        ":pool3d_impl",
+    ],
+)
+
 tfjs_cc_library(
     name = "AvgPool3D",
     srcs = ["kernels/AvgPool3D.cc"],
@@ -1200,6 +1211,15 @@ tfjs_cc_library(
     ],
 )
 
+tfjs_cc_library(
+    name = "MaxPoolGrad",
+    srcs = ["kernels/MaxPoolGrad.cc"],
+    deps = [
+        ":backend",
+        ":pool3d_impl",
+    ],
+)
+
 tfjs_unit_test(
     name = "MaxPool_test",
     srcs = ["kernels/MaxPool_test.cc"],
diff --git a/tfjs-backend-wasm/src/cc/kernels/AvgPoolGrad.cc b/tfjs-backend-wasm/src/cc/kernels/AvgPoolGrad.cc
new file mode 100644
index 00000000000..5f50361d81d
--- /dev/null
+++ b/tfjs-backend-wasm/src/cc/kernels/AvgPoolGrad.cc
@@ -0,0 +1,79 @@
+/**
+ * @license
+ * Copyright 2023 Google LLC.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * =============================================================================
+ */
+
+#ifdef __EMSCRIPTEN__
+#include <emscripten.h>
+#endif
+
+#include <algorithm>
+
+#include "tfjs-backend-wasm/src/cc/backend.h"
+#include "tfjs-backend-wasm/src/cc/pool3d_impl.h"
+
+namespace tfjs::wasm {
+
+// We use C-style API to interface with Javascript.
+extern "C" {
+
+#ifdef __EMSCRIPTEN__
+EMSCRIPTEN_KEEPALIVE
+#endif
+
+// REQUIRES:
+// - Tensor `dx` and `dy` must have dtype float32 (checked in tfjs-core)
+void AvgPoolGrad(int dy_id, int dx_id, int batch_size, int channel_size,
+                 int in_height, int in_width, int out_height, int out_width,
+                 int stride_height, int stride_width, int dilation_height,
+                 int dilation_width, int effective_filter_height,
+                 int effective_filter_width, int pad_top, int pad_left,
+                 int filter_height, int filter_width) {
+  const TensorInfo& dy_info = backend::get_tensor_info(dy_id);
+  TensorInfo& dx_info = backend::get_tensor_info_out(dx_id);
+
+  NDHWCPool3DGradImpl(
+      dy_info.f32(), dx_info.f32_write(),
+      NDHWCPool3DInfo{
+          .batch_size = batch_size,
+          .channel_size = channel_size,
+          .in_depth = 1,
+          .in_height = in_height,
+          .in_width = in_width,
+          .out_depth = 1,
+          .out_height = out_height,
+          .out_width = out_width,
+          .stride_depth = 1,
+          .stride_height = stride_height,
+          .stride_width = stride_width,
+          .dilation_depth = 1,
+          .dilation_height = dilation_height,
+          .dilation_width = dilation_width,
+          .effective_filter_depth = 1,
+          .effective_filter_height = effective_filter_height,
+          .effective_filter_width = effective_filter_width,
+          .pad_front = 0,
+          .pad_top = pad_top,
+          .pad_left = pad_left,
+      },
+      /*pixel_mask=*/
+      [avg_multiplier = 1.0f / (static_cast<float>(filter_height) *
+                                static_cast<float>(filter_width))](int, int) {
+        return avg_multiplier;
+      });
+}
+
+}  // extern "C"
+}  // namespace tfjs::wasm
diff --git a/tfjs-backend-wasm/src/cc/kernels/MaxPoolGrad.cc b/tfjs-backend-wasm/src/cc/kernels/MaxPoolGrad.cc
new file mode 100644
index 00000000000..bd56975b785
--- /dev/null
+++ b/tfjs-backend-wasm/src/cc/kernels/MaxPoolGrad.cc
@@ -0,0 +1,98 @@
+/**
+ * @license
+ * Copyright 2023 Google LLC.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * =============================================================================
+ */
+
+#ifdef __EMSCRIPTEN__
+#include <emscripten.h>
+#endif
+
+#include <algorithm>
+#include <limits>
+
+#include "tfjs-backend-wasm/src/cc/backend.h"
+#include "tfjs-backend-wasm/src/cc/pool3d_impl.h"
+
+namespace tfjs::wasm {
+
+// We use C-style API to interface with Javascript.
+extern "C" {
+
+#ifdef __EMSCRIPTEN__
+EMSCRIPTEN_KEEPALIVE
+#endif
+
+// REQUIRES:
+// - Tensor `x`, `dx` and `dy` must have dtype float32 (checked in tfjs-core)
+void MaxPoolGrad(int x_id, int dy_id, int dx_id, int batch_size,
+                 int channel_size, int in_height, int in_width, int out_height,
+                 int out_width, int stride_height, int stride_width,
+                 int dilation_height, int dilation_width,
+                 int effective_filter_height, int effective_filter_width,
+                 int pad_top, int pad_left) {
+  const TensorInfo& x_info = backend::get_tensor_info(x_id);
+  const TensorInfo& dy_info = backend::get_tensor_info(dy_id);
+  TensorInfo& dx_info = backend::get_tensor_info_out(dx_id);
+  NDHWCPool3DInfo pool_info{
+      .batch_size = batch_size,
+      .channel_size = channel_size,
+      .in_depth = 1,
+      .in_height = in_height,
+      .in_width = in_width,
+      .out_depth = 1,
+      .out_height = out_height,
+      .out_width = out_width,
+      .stride_depth = 1,
+      .stride_height = stride_height,
+      .stride_width = stride_width,
+      .dilation_depth = 1,
+      .dilation_height = dilation_height,
+      .dilation_width = dilation_width,
+      .effective_filter_depth = 1,
+      .effective_filter_height = effective_filter_height,
+      .effective_filter_width = effective_filter_width,
+      .pad_front = 0,
+      .pad_top = pad_top,
+      .pad_left = pad_left,
+  };
+
+  int* max_positions = new int[pool_info.out_size()];
+  NDHWCPool3DImpl</*IN=*/float, /*OUT=*/int>(
+      x_info.f32(), max_positions, pool_info,
+      /*filter_init=*/
+      []() -> std::pair<float, int> {
+        return {std::numeric_limits<float>::min(), 0};
+      },
+      /*filter_apply=*/
+      [](std::pair<float, int>& data, int x_offset, const float& x_val) {
+        if (x_val >= data.first) {
+          data = {x_val, x_offset};
+        }
+      },
+      /*filter_aggregate=*/
+      [](const std::pair<float, int>& data) { return data.second; });
+
+  NDHWCPool3DGradImpl(
+      dy_info.f32(), dx_info.f32_write(), pool_info,
+      /*pixel_mask=*/
+      [&max_positions](int dy_offset, int dx_offset) {
+        return static_cast<float>(dx_offset == max_positions[dy_offset]);
+      });
+
+  delete[] max_positions;
+}
+
+}  // extern "C"
+}  // namespace tfjs::wasm
diff --git a/tfjs-backend-wasm/src/kernels/AvgPoolGrad.ts b/tfjs-backend-wasm/src/kernels/AvgPoolGrad.ts
new file mode 100644
index 00000000000..e74d094c711
--- /dev/null
+++ b/tfjs-backend-wasm/src/kernels/AvgPoolGrad.ts
@@ -0,0 +1,97 @@
+/**
+ * @license
+ * Copyright 2023 Google LLC.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * =============================================================================
+ */
+
+import {AvgPoolGrad, AvgPoolGradAttrs, AvgPoolGradInputs, backend_util, KernelConfig, KernelFunc, TensorInfo} from '@tensorflow/tfjs-core';
+
+import {BackendWasm} from '../backend_wasm';
+
+let wasmAvgPoolGrad: (
+    dyId: number, dxId: number, batchSize: number, channelSize: number,
+    inHeight: number, inWidth: number, outHeight: number, outWidth: number,
+    strideHeight: number, strideWidth: number, dilationHeight: number,
+    dilationWidth: number, effectiveFilterHeight: number,
+    effectiveFilterWidth: number, padTop: number, padLeft: number,
+    filterHeight: number, filterWidth: number) => void;
+
+function setup(backend: BackendWasm) {
+  wasmAvgPoolGrad = backend.wasm.cwrap('AvgPoolGrad', null, [
+    'number',  // dyId
+    'number',  // dxId
+    'number',  // batchSize
+    'number',  // channelSize
+    'number',  // inHeight
+    'number',  // inWidth
+    'number',  // outHeight
+    'number',  // outWidth
+    'number',  // strideHeight
+    'number',  // strideWidth
+    'number',  // dilationHeight
+    'number',  // dilationWidth
+    'number',  // effectiveFilterHeight
+    'number',  // effectiveFilterWidth
+    'number',  // padTop
+    'number',  // padLeft
+    'number',  // filterHeight
+    'number',  // filterWidth
+  ]);
+}
+
+export function avgPoolGrad(args: {
+  inputs: AvgPoolGradInputs,
+  attrs: AvgPoolGradAttrs,
+  backend: BackendWasm,
+}): TensorInfo {
+  const {inputs, backend, attrs} = args;
+  const {dy, input} = inputs;
+  const {filterSize, strides, pad} = attrs;
+
+  const convInfo = backend_util.computePool2DInfo(
+      input.shape as [number, number, number, number], filterSize, strides,
+      /*dilations=*/1, pad);
+  const dx = backend.makeOutput(input.shape, input.dtype);
+
+  wasmAvgPoolGrad(
+      backend.dataIdMap.get(dy.dataId).id,
+      backend.dataIdMap.get(dx.dataId).id,
+      convInfo.batchSize,
+      // Since Pool ops (AvgPool and MaxPool) support 2D filter only, in
+      // channels should always equal to out channels.
+      /*channelSize=*/convInfo.inChannels,
+      convInfo.inHeight,
+      convInfo.inWidth,
+      convInfo.outHeight,
+      convInfo.outWidth,
+      convInfo.strideHeight,
+      convInfo.strideWidth,
+      convInfo.dilationHeight,
+      convInfo.dilationWidth,
+      convInfo.effectiveFilterHeight,
+      convInfo.effectiveFilterWidth,
+      convInfo.padInfo.top,
+      convInfo.padInfo.left,
+      convInfo.filterHeight,
+      convInfo.filterWidth,
+  );
+  return dx;
+}
+
+export const avgPoolGradConfig: KernelConfig = {
+  kernelName: AvgPoolGrad,
+  backendName: 'wasm',
+  setupFunc: setup,
+  kernelFunc: avgPoolGrad as unknown as KernelFunc
+};
diff --git a/tfjs-backend-wasm/src/kernels/MaxPoolGrad.ts b/tfjs-backend-wasm/src/kernels/MaxPoolGrad.ts
new file mode 100644
index 00000000000..792202c406f
--- /dev/null
+++ b/tfjs-backend-wasm/src/kernels/MaxPoolGrad.ts
@@ -0,0 +1,95 @@
+/**
+ * @license
+ * Copyright 2023 Google LLC.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * =============================================================================
+ */
+
+import {backend_util, KernelConfig, KernelFunc, MaxPoolGrad, MaxPoolGradAttrs, MaxPoolGradInputs, TensorInfo} from '@tensorflow/tfjs-core';
+
+import {BackendWasm} from '../backend_wasm';
+
+let wasmMaxPoolGrad: (
+    xId: number, dyId: number, dxId: number, batchSize: number,
+    channelSize: number, inHeight: number, inWidth: number, outHeight: number,
+    outWidth: number, strideHeight: number, strideWidth: number,
+    dilationHeight: number, dilationWidth: number,
+    effectiveFilterHeight: number, effectiveFilterWidth: number, padTop: number,
+    padLeft: number) => void;
+
+function setup(backend: BackendWasm) {
+  wasmMaxPoolGrad = backend.wasm.cwrap('MaxPoolGrad', null, [
+    'number',  // xId
+    'number',  // dyId
+    'number',  // dxId
+    'number',  // batchSize
+    'number',  // channelSize
+    'number',  // inHeight
+    'number',  // inWidth
+    'number',  // outHeight
+    'number',  // outWidth
+    'number',  // strideHeight
+    'number',  // strideWidth
+    'number',  // dilationHeight
+    'number',  // dilationWidth
+    'number',  // effectiveFilterHeight
+    'number',  // effectiveFilterWidth
+    'number',  // padTop
+    'number',  // padLeft
+  ]);
+}
+
+export function maxPoolGrad(args: {
+  inputs: MaxPoolGradInputs,
+  attrs: MaxPoolGradAttrs,
+  backend: BackendWasm,
+}): TensorInfo {
+  const {inputs, backend, attrs} = args;
+  const {dy, input} = inputs;
+  const {filterSize, strides, pad, dimRoundingMode} = attrs;
+
+  const convInfo = backend_util.computePool2DInfo(
+      input.shape as [number, number, number, number], filterSize, strides,
+      /*dilations=*/1, pad, dimRoundingMode);
+  const dx = backend.makeOutput(input.shape, input.dtype);
+
+  wasmMaxPoolGrad(
+      backend.dataIdMap.get(input.dataId).id,
+      backend.dataIdMap.get(dy.dataId).id,
+      backend.dataIdMap.get(dx.dataId).id,
+      convInfo.batchSize,
+      // Since Pool ops (MaxPool and MaxPool) support 2D filter only, in
+      // channels should always equal to out channels.
+      /*channelSize=*/convInfo.inChannels,
+      convInfo.inHeight,
+      convInfo.inWidth,
+      convInfo.outHeight,
+      convInfo.outWidth,
+      convInfo.strideHeight,
+      convInfo.strideWidth,
+      convInfo.dilationHeight,
+      convInfo.dilationWidth,
+      convInfo.effectiveFilterHeight,
+      convInfo.effectiveFilterWidth,
+      convInfo.padInfo.top,
+      convInfo.padInfo.left,
+  );
+  return dx;
+}
+
+export const maxPoolGradConfig: KernelConfig = {
+  kernelName: MaxPoolGrad,
+  backendName: 'wasm',
+  setupFunc: setup,
+  kernelFunc: maxPoolGrad as unknown as KernelFunc
+};
diff --git a/tfjs-backend-wasm/src/register_all_kernels.ts b/tfjs-backend-wasm/src/register_all_kernels.ts
index f2cfc346771..f517a610006 100644
--- a/tfjs-backend-wasm/src/register_all_kernels.ts
+++ b/tfjs-backend-wasm/src/register_all_kernels.ts
@@ -37,6 +37,7 @@ import {atanhConfig} from './kernels/Atanh';
 import {avgPoolConfig} from './kernels/AvgPool';
 import {avgPool3DConfig} from './kernels/AvgPool3D';
 import {avgPool3DGradConfig} from './kernels/AvgPool3DGrad';
+import {avgPoolGradConfig} from './kernels/AvgPoolGrad';
 import {batchMatMulConfig} from './kernels/BatchMatMul';
 import {batchToSpaceNDConfig} from './kernels/BatchToSpaceND';
 import {bincountConfig} from './kernels/Bincount';
@@ -102,6 +103,7 @@ import {maximumConfig} from './kernels/Maximum';
 import {maxPoolConfig} from './kernels/MaxPool';
 import {maxPool3DConfig} from './kernels/MaxPool3D';
 import {maxPool3DGradConfig} from './kernels/MaxPool3DGrad';
+import {maxPoolGradConfig} from './kernels/MaxPoolGrad';
 import {meanConfig} from './kernels/Mean';
 import {minConfig} from './kernels/Min';
 import {minimumConfig} from './kernels/Minimum';
@@ -191,6 +193,7 @@ const kernelConfigs: KernelConfig[] = [
   atan2Config,
   atanhConfig,
   avgPoolConfig,
+  avgPoolGradConfig,
   avgPool3DConfig,
   avgPool3DGradConfig,
   batchMatMulConfig,
@@ -258,6 +261,7 @@ const kernelConfigs: KernelConfig[] = [
   maxPoolConfig,
   maxPool3DConfig,
   maxPool3DGradConfig,
+  maxPoolGradConfig,
   meanConfig,
   minConfig,
   minimumConfig,
diff --git a/tfjs-backend-wasm/src/setup_test.ts b/tfjs-backend-wasm/src/setup_test.ts
index 42175b7b643..461b690b2c9 100644
--- a/tfjs-backend-wasm/src/setup_test.ts
+++ b/tfjs-backend-wasm/src/setup_test.ts
@@ -63,12 +63,7 @@ const TEST_FILTERS: TestFilter[] = [
     ]
   },
   {include: 'depthToSpace'},
-  {
-    include: 'avgPool ',
-    excludes: [
-      'gradient',  // Not yet implemented.
-    ]
-  },
+  {include: 'avgPool '},
   {
     include: 'relu',
     excludes: [
@@ -83,7 +78,6 @@ const TEST_FILTERS: TestFilter[] = [
   {
     include: 'maxPool',
     excludes: [
-      'maxPoolBackprop',   // Not yet implemented.
       'ignores NaNs',      // Actual != expected.
       'maxPoolWithArgmax'  // Not yet implemented.
     ]

From 03b436933e6341e3d7b6b7878affcbcaf4c799ae Mon Sep 17 00:00:00 2001
From: Matthew Soulanille <msoulanille@google.com>
Date: Thu, 11 May 2023 12:50:48 -0700
Subject: [PATCH 16/32] Fix golden integration tests not awaiting results
 (#7676)

Golden e2e model tests were not waiting for the asynchronous `tensor.data()` function to finish when checking results, making it hard to tell which test is failing. This PR fixes that.

After this fix, golden model tests are consistently timing out on BrowserStack Mac
Chrome. They work locally when tested on an M1 mac. Disable them on BrowserStack
for now.
---
 .../graph_model_golden_tests.ts               | 45 ++++++++-----------
 e2e/scripts/run-browserstack-tests.sh         |  6 ++-
 2 files changed, 22 insertions(+), 29 deletions(-)

diff --git a/e2e/integration_tests/graph_model_golden_tests.ts b/e2e/integration_tests/graph_model_golden_tests.ts
index 5d091abe28d..70022122bd4 100644
--- a/e2e/integration_tests/graph_model_golden_tests.ts
+++ b/e2e/integration_tests/graph_model_golden_tests.ts
@@ -20,7 +20,7 @@ import '@tensorflow/tfjs-backend-webgl';
 import * as tfconverter from '@tensorflow/tfjs-converter';
 import * as tfc from '@tensorflow/tfjs-core';
 // tslint:disable-next-line: no-imports-from-dist
-import {Constraints, describeWithFlags} from '@tensorflow/tfjs-core/dist/jasmine_util';
+import {ALL_ENVS, describeWithFlags} from '@tensorflow/tfjs-core/dist/jasmine_util';
 
 import {GOLDEN, KARMA_SERVER} from './constants';
 import * as GOLDEN_MODEL_DATA_FILENAMES from './graph_model_golden_data/filenames.json';
@@ -30,14 +30,7 @@ import {GraphModeGoldenData, TensorDetail} from './types';
 const DATA_URL = 'graph_model_golden_data';
 const INTERMEDIATE_NODE_TESTS_NUM = 5;
 
-// WebGPU freezes when running mobilenet on BrowserStack, so disable it for
-// automated tests until it's working.
-// TODO(mattSoulanille); Enable WebGPU golden file tests.
-const NO_WEBGPU: Constraints = {
-  predicate: env => env.backendName !== 'webgpu'
-}
-
-describeWithFlags(`${GOLDEN} graph_model_golden`, NO_WEBGPU, (env) => {
+describeWithFlags(`${GOLDEN} graph_model_golden`, ALL_ENVS, (env) => {
   let originalTimeout: number;
 
   beforeAll(async () => {
@@ -56,18 +49,16 @@ describeWithFlags(`${GOLDEN} graph_model_golden`, NO_WEBGPU, (env) => {
     describe(goldenFilename, () => {
       it('model.predict(...)', async () => {
         const [modelGolden, model] = await loadModelGolden(goldenFilename);
-        tfc.tidy(() => {
-          const outputs = model.predict(createGoldenInputTensors(modelGolden));
-          expectTensorsToEqualGoldens(outputs, modelGolden.outputDetails);
-        });
+        const outputs = model.predict(createGoldenInputTensors(modelGolden));
+        await expectTensorsToEqualGoldens(outputs, modelGolden.outputDetails);
+        tfc.dispose(outputs);
       });
 
       it('model.execute(...) with default outputs', async () => {
         const [modelGolden, model] = await loadModelGolden(goldenFilename);
-        tfc.tidy(() => {
-          const outputs = model.execute(createGoldenInputTensors(modelGolden));
-          expectTensorsToEqualGoldens(outputs, modelGolden.outputDetails);
-        });
+        const outputs = model.execute(createGoldenInputTensors(modelGolden));
+        await expectTensorsToEqualGoldens(outputs, modelGolden.outputDetails);
+        tfc.dispose(outputs);
       });
 
       for (let batchId = 1; batchId <= INTERMEDIATE_NODE_TESTS_NUM; ++batchId) {
@@ -97,13 +88,13 @@ describeWithFlags(`${GOLDEN} graph_model_golden`, NO_WEBGPU, (env) => {
                return details;
              });
 
-             tfc.tidy(() => {
-               const outputs = model.execute(
-                                   createGoldenInputTensors(modelGolden),
-                                   targetNodeNames) as tfc.Tensor[];
-               expect(outputs.length).toEqual(goldens.length);
-               expectTensorsToEqualGoldens(outputs, goldens);
-             });
+             const outputs = model.execute(
+                                 createGoldenInputTensors(modelGolden),
+                                 targetNodeNames) as tfc.Tensor[];
+
+             expect(outputs.length).toEqual(goldens.length);
+             await expectTensorsToEqualGoldens(outputs, goldens);
+             tfc.dispose(outputs);
            });
       }
     });
@@ -141,20 +132,20 @@ async function expectTensorsToEqualGoldens(
   expect(tensors).toEqual(jasmine.anything());
   expect(goldens).toEqual(jasmine.anything());
   if (tensors instanceof tfc.Tensor) {
-    expectTensorToEqualGolden(tensors, goldens as TensorDetail);
+    await expectTensorToEqualGolden(tensors, goldens as TensorDetail);
   } else if (Array.isArray(tensors)) {
     expect(Array.isArray(goldens)).toEqual(true);
     const details = goldens as TensorDetail[];
     expect(tensors.length).toEqual(details.length);
     for (let i = 0; i < tensors.length; ++i) {
-      expectTensorToEqualGolden(tensors[i], details[i]);
+      await expectTensorToEqualGolden(tensors[i], details[i]);
     }
   } else {
     const detailMap = goldens as Record<string, TensorDetail>;
     expect(new Set(Object.keys(detailMap)))
         .toEqual(new Set(Object.keys(tensors)));
     for (const [name, detail] of Object.entries(detailMap)) {
-      expectTensorToEqualGolden(tensors[name], detail);
+      await expectTensorToEqualGolden(tensors[name], detail);
     }
   }
 }
diff --git a/e2e/scripts/run-browserstack-tests.sh b/e2e/scripts/run-browserstack-tests.sh
index 84c774223ea..44b31c079f1 100755
--- a/e2e/scripts/run-browserstack-tests.sh
+++ b/e2e/scripts/run-browserstack-tests.sh
@@ -23,8 +23,10 @@ set -e
 TAGS="#SMOKE,#REGRESSION"
 TAGS_WITH_GOLDEN="$TAGS,#GOLDEN"
 
-# Test macOS with smoke/regression/golden tests.
-COMMANDS+=("yarn run-browserstack --browsers=bs_chrome_mac --tags '$TAGS_WITH_GOLDEN'")
+# Test macOS with smoke/regression tests.
+# Skip golden tests because they time out on browserstack (they work locally).
+# TODO(mattSoulanille): Make golden tests work on BrowserStack Mac.
+COMMANDS+=("yarn run-browserstack --browsers=bs_chrome_mac --tags '$TAGS'")
 
 # Test windows 10 with smoke/regression/golden tests.
 COMMANDS+=("yarn run-browserstack --browsers=win_10_chrome --tags '$TAGS_WITH_GOLDEN'")

From 6d7c132c1a8d7393765787d62fbbfc518189f022 Mon Sep 17 00:00:00 2001
From: chunnienc <121328115+chunnienc@users.noreply.github.com>
Date: Thu, 11 May 2023 13:21:11 -0700
Subject: [PATCH 17/32] [wasm] Add Mod kernel (#7670)

* add mod kernel to wasm backend

FEATURE

* Fix implementation

* Fix implementation

* Update license header

---------

Co-authored-by: Paul Vanhaesebrouck <paul.vanhaesebrouck@gmail.com>
---
 tfjs-backend-wasm/src/cc/BUILD.bazel          | 10 +++
 tfjs-backend-wasm/src/cc/kernels/Mod.cc       | 76 +++++++++++++++++++
 tfjs-backend-wasm/src/kernels/Mod.ts          | 23 ++++++
 tfjs-backend-wasm/src/register_all_kernels.ts |  2 +
 tfjs-backend-wasm/src/setup_test.ts           |  1 +
 5 files changed, 112 insertions(+)
 create mode 100644 tfjs-backend-wasm/src/cc/kernels/Mod.cc
 create mode 100644 tfjs-backend-wasm/src/kernels/Mod.ts

diff --git a/tfjs-backend-wasm/src/cc/BUILD.bazel b/tfjs-backend-wasm/src/cc/BUILD.bazel
index 5ea8f725748..7d89e9cd7c7 100644
--- a/tfjs-backend-wasm/src/cc/BUILD.bazel
+++ b/tfjs-backend-wasm/src/cc/BUILD.bazel
@@ -411,6 +411,7 @@ tfjs_cc_library(
         ":Min",
         ":Minimum",
         ":MirrorPad",
+        ":Mod",
         ":Multinomial",
         ":Multiply",
         ":Neg",
@@ -1275,6 +1276,15 @@ tfjs_cc_library(
     ],
 )
 
+tfjs_cc_library(
+    name = "Mod",
+    srcs = ["kernels/Mod.cc"],
+    deps = [
+        ":binary",
+        ":util",
+    ],
+)
+
 tfjs_cc_library(
     name = "Multiply",
     srcs = ["kernels/Multiply.cc"],
diff --git a/tfjs-backend-wasm/src/cc/kernels/Mod.cc b/tfjs-backend-wasm/src/cc/kernels/Mod.cc
new file mode 100644
index 00000000000..a73dd2288fa
--- /dev/null
+++ b/tfjs-backend-wasm/src/cc/kernels/Mod.cc
@@ -0,0 +1,76 @@
+/* Copyright 2023 Google LLC.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ===========================================================================*/
+
+#ifdef __EMSCRIPTEN__
+#include <emscripten.h>
+#endif
+
+#include <cmath>
+#include <cstddef>
+
+#include "tfjs-backend-wasm/src/cc/binary.h"
+#include "tfjs-backend-wasm/src/cc/util.h"
+
+namespace tfjs {
+namespace wasm {
+
+namespace {
+
+template <typename T>
+inline T ModInt(T a, T b) {
+  T rem = a % b;
+  if ((a < 0 && b < 0) || (a >= 0 && b >= 0)) {
+    return rem;
+  }
+  return (rem + b) % b;
+}
+
+template <typename T>
+inline T ModFloat(T a, T b) {
+  T rem = std::fmod(a, b);
+  if ((a < 0 && b < 0) || (a >= 0 && b >= 0)) {
+    return rem;
+  }
+  return std::fmod(rem + b, b);
+}
+
+}  // namespace
+
+// We use C-style API to interface with Javascript.
+extern "C" {
+
+#ifdef __EMSCRIPTEN__
+EMSCRIPTEN_KEEPALIVE
+#endif
+void Mod(const size_t a_id, const size_t* a_shape_ptr, const size_t a_shape_len,
+         const size_t b_id, const size_t* b_shape_ptr, const size_t b_shape_len,
+         const DType dtype, const size_t out_id) {
+  switch (dtype) {
+    case DType::float32:
+      binary_f32(a_id, a_shape_ptr, a_shape_len, b_id, b_shape_ptr, b_shape_len,
+                 out_id, ModFloat<float>);
+      break;
+    case DType::int32:
+      binary_i32(a_id, a_shape_ptr, a_shape_len, b_id, b_shape_ptr, b_shape_len,
+                 out_id, ModInt<int32_t>);
+      break;
+    default:
+      util::warn("Mod for tensor ids %d and %d failed. Unsupported dtype %d",
+                 a_id, b_id, dtype);
+  }
+}
+
+}  // extern "C"
+}  // namespace wasm
+}  // namespace tfjs
diff --git a/tfjs-backend-wasm/src/kernels/Mod.ts b/tfjs-backend-wasm/src/kernels/Mod.ts
new file mode 100644
index 00000000000..9ac377a4aa4
--- /dev/null
+++ b/tfjs-backend-wasm/src/kernels/Mod.ts
@@ -0,0 +1,23 @@
+/**
+ * @license
+ * Copyright 2023 Google LLC.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * =============================================================================
+ */
+
+import {KernelConfig, Mod} from '@tensorflow/tfjs-core';
+
+import {createBinaryKernelConfig} from './binary_kernel';
+
+export const modConfig: KernelConfig =
+    createBinaryKernelConfig(Mod, /*supportsFullBroadcast=*/true);
diff --git a/tfjs-backend-wasm/src/register_all_kernels.ts b/tfjs-backend-wasm/src/register_all_kernels.ts
index f517a610006..543cd8e753e 100644
--- a/tfjs-backend-wasm/src/register_all_kernels.ts
+++ b/tfjs-backend-wasm/src/register_all_kernels.ts
@@ -109,6 +109,7 @@ import {minConfig} from './kernels/Min';
 import {minimumConfig} from './kernels/Minimum';
 import {mirrorPadConfig} from './kernels/MirrorPad';
 import {multinomialConfig} from './kernels/Multinomial';
+import {modConfig} from './kernels/Mod';
 import {multiplyConfig} from './kernels/Multiply';
 import {negConfig} from './kernels/Neg';
 import {nonMaxSuppressionV3Config} from './kernels/NonMaxSuppressionV3';
@@ -267,6 +268,7 @@ const kernelConfigs: KernelConfig[] = [
   minimumConfig,
   mirrorPadConfig,
   multinomialConfig,
+  modConfig,
   multiplyConfig,
   negConfig,
   nonMaxSuppressionV3Config,
diff --git a/tfjs-backend-wasm/src/setup_test.ts b/tfjs-backend-wasm/src/setup_test.ts
index 461b690b2c9..4884eb5547f 100644
--- a/tfjs-backend-wasm/src/setup_test.ts
+++ b/tfjs-backend-wasm/src/setup_test.ts
@@ -409,6 +409,7 @@ const TEST_FILTERS: TestFilter[] = [
   {include: 'multinomial'},
   {include: 'unique'},
   {include: 'conv3d'},
+  {include: 'mod '},
 ];
 
 const customInclude = (testName: string) => {

From fd48f88aebe15ba42a37369699cc61ad6b903c5c Mon Sep 17 00:00:00 2001
From: chunnienc <121328115+chunnienc@users.noreply.github.com>
Date: Thu, 11 May 2023 13:32:01 -0700
Subject: [PATCH 18/32] Add usernames to release note script (#7680)

---
 scripts/release_notes/util.ts | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/scripts/release_notes/util.ts b/scripts/release_notes/util.ts
index 96c5d688aaf..3d40c91056b 100644
--- a/scripts/release_notes/util.ts
+++ b/scripts/release_notes/util.ts
@@ -19,7 +19,7 @@
 import * as shell from 'shelljs';
 import * as readline from 'readline';
 
-const GOOGLERS_WITH_GMAIL = [
+const GOOGLERS_WITH_GMAIL = new Set([
   'dsmilkov',
   'kainino0x',
   'davidsoergel',
@@ -31,7 +31,14 @@ const GOOGLERS_WITH_GMAIL = [
   'lina128',
   'mattsoulanille',
   'jinjingforever',
-];
+  'chunnienc',
+  'Linchenn',
+  'fengwuyao',
+].map((s) => s.trim().toLowerCase()));
+
+function isGooglerUsername(username: string): boolean {
+  return GOOGLERS_WITH_GMAIL.has(username.trim().toLocaleLowerCase());
+}
 
 const rl =
     readline.createInterface({input: process.stdin, output: process.stdout});
@@ -124,7 +131,7 @@ const SECTION_TAGS: SectionTag[] = [
  */
 export async function getReleaseNotesDraft(
     octokit: OctokitGetCommit, repoCommits: RepoCommits[]): Promise<string> {
-  const repoNotes = [];
+  const repoNotes: string[] = [];
   for (let i = 0; i < repoCommits.length; i++) {
     const repoCommit = repoCommits[i];
 
@@ -174,13 +181,13 @@ export async function getReleaseNotesDraft(
       const username = await getUsernameForCommit(commit.sha);
       const isExternalContributor =
           !commit.authorEmail.endsWith('@google.com') &&
-          GOOGLERS_WITH_GMAIL.indexOf(username) === -1;
+          !isGooglerUsername(username);
 
       const pullRequestRegexp = /\(#([0-9]+)\)/;
       const pullRequestMatch = commit.subject.match(pullRequestRegexp);
 
       let subject = commit.subject;
-      let pullRequestNumber = null;
+      let pullRequestNumber: string|null = null;
       if (pullRequestMatch != null) {
         subject = subject.replace(pullRequestRegexp, '').trim();
         pullRequestNumber = pullRequestMatch[1];

From cf72ac03cb160234c48374491d2ca76f5b348a6a Mon Sep 17 00:00:00 2001
From: chunnienc <121328115+chunnienc@users.noreply.github.com>
Date: Thu, 11 May 2023 13:58:38 -0700
Subject: [PATCH 19/32] [wasm] Add MaxPoolWithArgmax kernel (#7673)

* add implementation

* Fix lint

* Update with  new func signature
---
 tfjs-backend-wasm/src/cc/BUILD.bazel          |  11 ++
 tfjs-backend-wasm/src/cc/kernels/AvgPool3D.cc |  11 +-
 tfjs-backend-wasm/src/cc/kernels/MaxPool3D.cc |   8 +-
 .../src/cc/kernels/MaxPool3DGrad.cc           |  10 +-
 .../src/cc/kernels/MaxPoolGrad.cc             |  10 +-
 .../src/cc/kernels/MaxPoolWithArgmax.cc       | 127 ++++++++++++++++++
 tfjs-backend-wasm/src/cc/pool3d_impl.h        |  11 +-
 .../src/kernels/MaxPoolWithArgmax.ts          | 111 +++++++++++++++
 tfjs-backend-wasm/src/register_all_kernels.ts |   2 +
 tfjs-backend-wasm/src/setup_test.ts           |   1 -
 10 files changed, 279 insertions(+), 23 deletions(-)
 create mode 100644 tfjs-backend-wasm/src/cc/kernels/MaxPoolWithArgmax.cc
 create mode 100644 tfjs-backend-wasm/src/kernels/MaxPoolWithArgmax.ts

diff --git a/tfjs-backend-wasm/src/cc/BUILD.bazel b/tfjs-backend-wasm/src/cc/BUILD.bazel
index 7d89e9cd7c7..bba48d8d110 100644
--- a/tfjs-backend-wasm/src/cc/BUILD.bazel
+++ b/tfjs-backend-wasm/src/cc/BUILD.bazel
@@ -407,6 +407,7 @@ tfjs_cc_library(
         ":MaxPool3D",
         ":MaxPool3DGrad",
         ":MaxPoolGrad",
+        ":MaxPoolWithArgmax",
         ":Maximum",
         ":Min",
         ":Minimum",
@@ -1248,6 +1249,16 @@ tfjs_cc_library(
     ],
 )
 
+tfjs_cc_library(
+    name = "MaxPoolWithArgmax",
+    srcs = ["kernels/MaxPoolWithArgmax.cc"],
+    deps = [
+        ":backend",
+        ":pool3d_impl",
+        ":util",
+    ],
+)
+
 tfjs_cc_library(
     name = "Min",
     srcs = ["kernels/Min.cc"],
diff --git a/tfjs-backend-wasm/src/cc/kernels/AvgPool3D.cc b/tfjs-backend-wasm/src/cc/kernels/AvgPool3D.cc
index 368207e273f..07188471ab1 100644
--- a/tfjs-backend-wasm/src/cc/kernels/AvgPool3D.cc
+++ b/tfjs-backend-wasm/src/cc/kernels/AvgPool3D.cc
@@ -45,7 +45,7 @@ void AvgPool3D(int x_id, int out_id, int batch_size, int channel_size,
   const TensorInfo& x_info = backend::get_tensor_info(x_id);
   TensorInfo& out_info = backend::get_tensor_info_out(out_id);
 
-  NDHWCPool3DImpl(x_info.f32(), out_info.f32_write(),
+  NDHWCPool3DImpl(x_info.f32(),
                   NDHWCPool3DInfo{
                       .batch_size = batch_size,
                       .channel_size = channel_size,
@@ -77,10 +77,11 @@ void AvgPool3D(int x_id, int out_id, int batch_size, int channel_size,
                     data.first += val;
                     ++data.second;
                   },
-                  /*filter_aggregate=*/
-                  [](const std::pair<float, int>& data) {
-                    return data.first /
-                           static_cast<float>(std::max(data.second, 1));
+                  /*filter_assign=*/
+                  [buf = out_info.f32_write()](
+                      int offset, const std::pair<float, int>& data) {
+                    buf[offset] = data.first /
+                                  static_cast<float>(std::max(data.second, 1));
                   });
 }
 
diff --git a/tfjs-backend-wasm/src/cc/kernels/MaxPool3D.cc b/tfjs-backend-wasm/src/cc/kernels/MaxPool3D.cc
index 877bada6b16..0d05064180c 100644
--- a/tfjs-backend-wasm/src/cc/kernels/MaxPool3D.cc
+++ b/tfjs-backend-wasm/src/cc/kernels/MaxPool3D.cc
@@ -46,7 +46,7 @@ void MaxPool3D(int x_id, int out_id, int batch_size, int channel_size,
   TensorInfo& out_info = backend::get_tensor_info_out(out_id);
 
   NDHWCPool3DImpl(
-      x_info.f32(), out_info.f32_write(),
+      x_info.f32(),
       NDHWCPool3DInfo{
           .batch_size = batch_size,
           .channel_size = channel_size,
@@ -73,8 +73,10 @@ void MaxPool3D(int x_id, int out_id, int batch_size, int channel_size,
       []() -> float { return std::numeric_limits<float>::min(); },
       /*filter_apply=*/
       [](float& data, int, const float& val) { data = std::max(data, val); },
-      /*filter_aggregate=*/
-      [](const float& data) { return data; });
+      /*filter_assign=*/
+      [buf = out_info.f32_write()](int offset, const float& data) {
+        buf[offset] = data;
+      });
 }
 
 }  // extern "C"
diff --git a/tfjs-backend-wasm/src/cc/kernels/MaxPool3DGrad.cc b/tfjs-backend-wasm/src/cc/kernels/MaxPool3DGrad.cc
index a99514cdc0d..e30578957cb 100644
--- a/tfjs-backend-wasm/src/cc/kernels/MaxPool3DGrad.cc
+++ b/tfjs-backend-wasm/src/cc/kernels/MaxPool3DGrad.cc
@@ -70,8 +70,8 @@ void MaxPool3DGrad(int x_id, int dy_id, int dx_id, int batch_size,
   };
 
   int* max_positions = new int[pool3d_info.out_size()];
-  NDHWCPool3DImpl</*IN=*/float, /*OUT=*/int>(
-      x_info.f32(), max_positions, pool3d_info,
+  NDHWCPool3DImpl</*IN=*/float>(
+      x_info.f32(), pool3d_info,
       /*filter_init=*/
       []() -> std::pair<float, int> {
         return {std::numeric_limits<float>::min(), 0};
@@ -82,8 +82,10 @@ void MaxPool3DGrad(int x_id, int dy_id, int dx_id, int batch_size,
           data = {x_val, x_offset};
         }
       },
-      /*filter_aggregate=*/
-      [](const std::pair<float, int>& data) { return data.second; });
+      /*filter_assign=*/
+      [max_positions](int offset, const std::pair<float, int>& data) {
+        max_positions[offset] = data.second;
+      });
 
   NDHWCPool3DGradImpl(
       dy_info.f32(), dx_info.f32_write(), pool3d_info,
diff --git a/tfjs-backend-wasm/src/cc/kernels/MaxPoolGrad.cc b/tfjs-backend-wasm/src/cc/kernels/MaxPoolGrad.cc
index bd56975b785..992fc3eda96 100644
--- a/tfjs-backend-wasm/src/cc/kernels/MaxPoolGrad.cc
+++ b/tfjs-backend-wasm/src/cc/kernels/MaxPoolGrad.cc
@@ -69,8 +69,8 @@ void MaxPoolGrad(int x_id, int dy_id, int dx_id, int batch_size,
   };
 
   int* max_positions = new int[pool_info.out_size()];
-  NDHWCPool3DImpl</*IN=*/float, /*OUT=*/int>(
-      x_info.f32(), max_positions, pool_info,
+  NDHWCPool3DImpl</*IN=*/float>(
+      x_info.f32(), pool_info,
       /*filter_init=*/
       []() -> std::pair<float, int> {
         return {std::numeric_limits<float>::min(), 0};
@@ -81,8 +81,10 @@ void MaxPoolGrad(int x_id, int dy_id, int dx_id, int batch_size,
           data = {x_val, x_offset};
         }
       },
-      /*filter_aggregate=*/
-      [](const std::pair<float, int>& data) { return data.second; });
+      /*filter_assign=*/
+      [max_positions](int offset, const std::pair<float, int>& data) {
+        max_positions[offset] = data.second;
+      });
 
   NDHWCPool3DGradImpl(
       dy_info.f32(), dx_info.f32_write(), pool_info,
diff --git a/tfjs-backend-wasm/src/cc/kernels/MaxPoolWithArgmax.cc b/tfjs-backend-wasm/src/cc/kernels/MaxPoolWithArgmax.cc
new file mode 100644
index 00000000000..8ff43092b2c
--- /dev/null
+++ b/tfjs-backend-wasm/src/cc/kernels/MaxPoolWithArgmax.cc
@@ -0,0 +1,127 @@
+/* Copyright 2023 Google LLC.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ===========================================================================*/
+
+#ifdef __EMSCRIPTEN__
+#include <emscripten.h>
+#endif
+
+#include <algorithm>
+#include <limits>
+
+#include "tfjs-backend-wasm/src/cc/backend.h"
+#include "tfjs-backend-wasm/src/cc/pool3d_impl.h"
+#include "tfjs-backend-wasm/src/cc/util.h"
+
+namespace tfjs::wasm {
+
+namespace {
+
+template <typename T>
+inline void MaxPoolWithArgmaxImpl(const T* x_buf, T* pooled_buf,
+                                  int32_t* indexes_buf,
+                                  bool include_batch_index,
+                                  const NDHWCPool3DInfo& pool_info) {
+  NDHWCPool3DImpl(x_buf, pool_info,
+                  /*filter_init=*/
+                  []() -> std::pair<T, int> {
+                    return {std::numeric_limits<T>::min(), 0};
+                  },
+                  /*filter_apply=*/
+                  [](std::pair<T, int>& data, int x_offset, const T& x_val) {
+                    if (x_val >= data.first) {
+                      data = {x_val, x_offset};
+                    }
+                  },
+                  /*filter_assign=*/
+                  [pooled_buf, indexes_buf, include_batch_index,
+                   index_mod = pool_info.in_height * pool_info.in_width *
+                               pool_info.channel_size](
+                      int offset, const std::pair<T, int>& data) {
+                    pooled_buf[offset] = data.first;
+                    indexes_buf[offset] = include_batch_index
+                                              ? data.second
+                                              : data.second % index_mod;
+                  });
+}
+
+}  // namespace
+
+// We use C-style API to interface with Javascript.
+extern "C" {
+
+#ifdef __EMSCRIPTEN__
+EMSCRIPTEN_KEEPALIVE
+#endif
+
+// REQUIRES:
+// - Tensor `x` and `out` must have the same dtype.
+// - Tensor `indexes` must have dtype `int32`.
+void MaxPoolWithArgmax(int x_id, int pooled_id, int indexes_id, DType dtype,
+                       bool include_batch_index, int batch_size,
+                       int channel_size, int in_height, int in_width,
+                       int out_height, int out_width, int stride_height,
+                       int stride_width, int dilation_height,
+                       int dilation_width, int effective_filter_height,
+                       int effective_filter_width, int pad_top, int pad_left) {
+  const TensorInfo& x_info = backend::get_tensor_info(x_id);
+  TensorInfo& pooled_info = backend::get_tensor_info_out(pooled_id);
+  TensorInfo& indexes_info = backend::get_tensor_info_out(indexes_id);
+
+  NDHWCPool3DInfo pool_info{
+      .batch_size = batch_size,
+      .channel_size = channel_size,
+      .in_depth = 1,
+      .in_height = in_height,
+      .in_width = in_width,
+      .out_depth = 1,
+      .out_height = out_height,
+      .out_width = out_width,
+      .stride_depth = 1,
+      .stride_height = stride_height,
+      .stride_width = stride_width,
+      .dilation_depth = 1,
+      .dilation_height = dilation_height,
+      .dilation_width = dilation_width,
+      .effective_filter_depth = 1,
+      .effective_filter_height = effective_filter_height,
+      .effective_filter_width = effective_filter_width,
+      .pad_front = 0,
+      .pad_top = pad_top,
+      .pad_left = pad_left,
+  };
+
+  switch (dtype) {
+    case DType::float32:
+      MaxPoolWithArgmaxImpl(x_info.f32(), pooled_info.f32_write(),
+                            indexes_info.i32_write(), include_batch_index,
+                            pool_info);
+      break;
+    case DType::int32:
+      MaxPoolWithArgmaxImpl(x_info.i32(), pooled_info.i32_write(),
+                            indexes_info.i32_write(), include_batch_index,
+                            pool_info);
+      break;
+    case DType::boolean:
+      MaxPoolWithArgmaxImpl(x_info.b(), pooled_info.b_write(),
+                            indexes_info.i32_write(), include_batch_index,
+                            pool_info);
+      break;
+    default:
+      util::warn("MaxPoolWithArgmax for tensor id failed. Unknown dtype %d",
+                 x_id, dtype);
+  }
+}
+
+}  // extern "C"
+}  // namespace tfjs::wasm
diff --git a/tfjs-backend-wasm/src/cc/pool3d_impl.h b/tfjs-backend-wasm/src/cc/pool3d_impl.h
index 8e705f59bc9..6faa75961e4 100644
--- a/tfjs-backend-wasm/src/cc/pool3d_impl.h
+++ b/tfjs-backend-wasm/src/cc/pool3d_impl.h
@@ -75,11 +75,10 @@ struct NDHWCPool3DInfo {
   inline int int_size() const { return in_shape().size(); }
   inline int out_size() const { return out_shape().size(); }
 };
-template <typename IN, typename OUT, typename FI, typename FAP, typename FAG>
-inline void NDHWCPool3DImpl(const IN* x_buf, OUT* out_buf,
-                            const NDHWCPool3DInfo& info, const FI& filter_init,
-                            const FAP& filter_apply,
-                            const FAG& filter_aggregate) {
+template <typename IN, typename FI, typename FAP, typename FAG>
+inline void NDHWCPool3DImpl(const IN* x_buf, const NDHWCPool3DInfo& info,
+                            const FI& filter_init, const FAP& filter_apply,
+                            const FAG& filter_assign) {
   for (int batch = 0; batch < info.batch_size; ++batch) {
     for (int channel = 0; channel < info.channel_size; ++channel) {
       for (int y_depth = 0; y_depth < info.out_depth; ++y_depth) {
@@ -118,7 +117,7 @@ inline void NDHWCPool3DImpl(const IN* x_buf, OUT* out_buf,
             }
             int out_offset =
                 info.out_offset(batch, y_depth, y_row, y_col, channel);
-            out_buf[out_offset] = filter_aggregate(filter_data);
+            filter_assign(out_offset, filter_data);
           }
         }
       }
diff --git a/tfjs-backend-wasm/src/kernels/MaxPoolWithArgmax.ts b/tfjs-backend-wasm/src/kernels/MaxPoolWithArgmax.ts
new file mode 100644
index 00000000000..f2d5a33caff
--- /dev/null
+++ b/tfjs-backend-wasm/src/kernels/MaxPoolWithArgmax.ts
@@ -0,0 +1,111 @@
+/**
+ * @license
+ * Copyright 2023 Google LLC.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * =============================================================================
+ */
+
+import {backend_util, KernelConfig, KernelFunc, MaxPoolWithArgmax, MaxPoolWithArgmaxAttrs, MaxPoolWithArgmaxInputs, TensorInfo, util} from '@tensorflow/tfjs-core';
+
+import {BackendWasm} from '../backend_wasm';
+
+import {CppDType} from './types';
+
+let wasmMaxPoolWithArgmax: (
+    xId: number, pooledId: number, indexesId: number, dtype: number,
+    includeBatchIndex: boolean, batchSize: number, channelSize: number,
+    inHeight: number, inWidth: number, outHeight: number, outWidth: number,
+    strideHeight: number, strideWidth: number, dilationHeight: number,
+    dilationWidth: number, effectiveFilterHeight: number,
+    effectiveFilterWidth: number, padTop: number, padLeft: number) => void;
+
+function setup(backend: BackendWasm) {
+  wasmMaxPoolWithArgmax = backend.wasm.cwrap('MaxPoolWithArgmax', null, [
+    'number',   // xId
+    'number',   // pooledId
+    'number',   // indexesId
+    'number',   // dtype
+    'boolean',  // includeBatchIndex
+    'number',   // batchSize
+    'number',   // channelSize
+    'number',   // inHeight
+    'number',   // inWidth
+    'number',   // outHeight
+    'number',   // outWidth
+    'number',   // strideHeight
+    'number',   // strideWidth
+    'number',   // dilationHeight
+    'number',   // dilationWidth
+    'number',   // effectiveFilterHeight
+    'number',   // effectiveFilterWidth
+    'number',   // padTop
+    'number',   // padLeft
+  ]);
+}
+
+export function maxPoolWithArgmax(args: {
+  inputs: MaxPoolWithArgmaxInputs,
+  attrs: MaxPoolWithArgmaxAttrs,
+  backend: BackendWasm,
+}): TensorInfo[] {
+  const {inputs, backend, attrs} = args;
+  const {x} = inputs;
+  const {filterSize, strides, pad, includeBatchInIndex} = attrs;
+
+  util.assert(
+      x.shape.length === 4,
+      () => `Error in maxPool: input must be rank 4 but got rank ${
+          x.shape.length}.`);
+  const dilations: [number, number] = [1, 1];
+  util.assert(
+      backend_util.eitherStridesOrDilationsAreOne(strides, dilations),
+      () => 'Error in maxPool: Either strides or dilations must be 1. ' +
+          `Got strides ${strides} and dilations '${dilations}'`);
+
+  const convInfo = backend_util.computePool2DInfo(
+      x.shape as [number, number, number, number], filterSize, strides, [1, 1],
+      pad);
+
+  const pooled = backend.makeOutput(convInfo.outShape, x.dtype);
+  const indexes = backend.makeOutput(convInfo.outShape, 'int32');
+
+  wasmMaxPoolWithArgmax(
+      backend.dataIdMap.get(x.dataId).id,
+      backend.dataIdMap.get(pooled.dataId).id,
+      backend.dataIdMap.get(indexes.dataId).id,
+      CppDType[x.dtype],
+      includeBatchInIndex,
+      convInfo.batchSize,
+      convInfo.inChannels,
+      convInfo.inHeight,
+      convInfo.inWidth,
+      convInfo.outHeight,
+      convInfo.outWidth,
+      convInfo.strideHeight,
+      convInfo.strideWidth,
+      convInfo.dilationHeight,
+      convInfo.dilationWidth,
+      convInfo.effectiveFilterHeight,
+      convInfo.effectiveFilterWidth,
+      convInfo.padInfo.top,
+      convInfo.padInfo.left,
+  );
+  return [pooled, indexes];
+}
+
+export const maxPoolWithArgmaxConfig: KernelConfig = {
+  kernelName: MaxPoolWithArgmax,
+  backendName: 'wasm',
+  setupFunc: setup,
+  kernelFunc: maxPoolWithArgmax as unknown as KernelFunc
+};
diff --git a/tfjs-backend-wasm/src/register_all_kernels.ts b/tfjs-backend-wasm/src/register_all_kernels.ts
index 543cd8e753e..9b9f758c29b 100644
--- a/tfjs-backend-wasm/src/register_all_kernels.ts
+++ b/tfjs-backend-wasm/src/register_all_kernels.ts
@@ -104,6 +104,7 @@ import {maxPoolConfig} from './kernels/MaxPool';
 import {maxPool3DConfig} from './kernels/MaxPool3D';
 import {maxPool3DGradConfig} from './kernels/MaxPool3DGrad';
 import {maxPoolGradConfig} from './kernels/MaxPoolGrad';
+import {maxPoolWithArgmaxConfig} from './kernels/MaxPoolWithArgmax';
 import {meanConfig} from './kernels/Mean';
 import {minConfig} from './kernels/Min';
 import {minimumConfig} from './kernels/Minimum';
@@ -263,6 +264,7 @@ const kernelConfigs: KernelConfig[] = [
   maxPool3DConfig,
   maxPool3DGradConfig,
   maxPoolGradConfig,
+  maxPoolWithArgmaxConfig,
   meanConfig,
   minConfig,
   minimumConfig,
diff --git a/tfjs-backend-wasm/src/setup_test.ts b/tfjs-backend-wasm/src/setup_test.ts
index 4884eb5547f..0600637f84e 100644
--- a/tfjs-backend-wasm/src/setup_test.ts
+++ b/tfjs-backend-wasm/src/setup_test.ts
@@ -79,7 +79,6 @@ const TEST_FILTERS: TestFilter[] = [
     include: 'maxPool',
     excludes: [
       'ignores NaNs',      // Actual != expected.
-      'maxPoolWithArgmax'  // Not yet implemented.
     ]
   },
   {include: 'cropAndResize'},

From 825b04ba06103fe437231bbf04736feef79ba664 Mon Sep 17 00:00:00 2001
From: Matthew Soulanille <msoulanille@google.com>
Date: Thu, 11 May 2023 15:48:31 -0700
Subject: [PATCH 20/32] Remove duplicate e2e test (#7682)

The removed test is already present on line 29.
---
 e2e/scripts/run-browserstack-tests.sh | 1 -
 1 file changed, 1 deletion(-)

diff --git a/e2e/scripts/run-browserstack-tests.sh b/e2e/scripts/run-browserstack-tests.sh
index 44b31c079f1..8822421b5e9 100755
--- a/e2e/scripts/run-browserstack-tests.sh
+++ b/e2e/scripts/run-browserstack-tests.sh
@@ -40,7 +40,6 @@ if [[ "$NIGHTLY" = true || "$RELEASE" = true ]]; then
     "yarn run-browserstack --browsers=bs_ios_12 --tags '$TAGS' --testEnv webgl --flags '{\"\\"\"WEBGL_VERSION\"\\"\": 1, \"\\"\"WEBGL_CPU_FORWARD\"\\"\": false, \"\\"\"WEBGL_SIZE_UPLOAD_UNIFORM\"\\"\": 0}'"
     "yarn run-browserstack --browsers=bs_safari_mac --tags '$TAGS' --testEnv webgl --flags '{\"\\"\"WEBGL_VERSION\"\\"\": 1, \"\\"\"WEBGL_CPU_FORWARD\"\\"\": false, \"\\"\"WEBGL_SIZE_UPLOAD_UNIFORM\"\\"\": 0}'"
     "yarn run-browserstack --browsers=bs_firefox_mac --tags '$TAGS'"
-    "yarn run-browserstack --browsers=bs_chrome_mac --tags '$TAGS'"
     "yarn run-browserstack --browsers=bs_android_10 --tags '$TAGS'"
     # Test script tag bundles
     "karma start ./script_tag_tests/tfjs-core-cpu/karma.conf.js --browserstack --browsers=bs_chrome_mac"

From 089efca029272e01effee673b0f0f2a5a1d2e73a Mon Sep 17 00:00:00 2001
From: Matthew Soulanille <msoulanille@google.com>
Date: Mon, 15 May 2023 14:03:03 -0700
Subject: [PATCH 21/32] Fix Verdaccio nightly e2e tests (#7644)

Replace e2e's custom local publishing logic with the publish-npm script. This more accurately represents how we release TFJS.

Fix publish-npm not waiting for Verdaccio to start before pushing packages.

Avoid testing WebGPU on platforms other than Chrome MacOS (for now).

---------

Co-authored-by: Linchenn <40653845+Linchenn@users.noreply.github.com>
---
 e2e/integration_tests/setup_test.ts |  3 +-
 e2e/scripts/local-registry.sh       |  4 +-
 e2e/scripts/publish-tfjs-ci.sh      | 92 -----------------------------
 e2e/scripts/release-e2e.sh          | 16 +++--
 e2e/scripts/verdaccio.yaml          |  4 ++
 scripts/package_dependencies.json   |  2 +-
 scripts/publish-npm.ts              | 57 +++++++++++++-----
 scripts/release-util.ts             | 58 ++++++++++++------
 8 files changed, 100 insertions(+), 136 deletions(-)
 delete mode 100755 e2e/scripts/publish-tfjs-ci.sh

diff --git a/e2e/integration_tests/setup_test.ts b/e2e/integration_tests/setup_test.ts
index 915dd68e0f9..8e319b1a6af 100644
--- a/e2e/integration_tests/setup_test.ts
+++ b/e2e/integration_tests/setup_test.ts
@@ -47,7 +47,8 @@ registerTestEnv({name: 'cpu', backendName: 'cpu', isDataSync: true});
 
 // TODO: Support test windows on WebGPU. Bug:
 // https://github.com/tensorflow/tfjs/issues/7616.
-if (navigator.platform.toUpperCase().indexOf('MAC') >= 0) {
+if (navigator.platform.toUpperCase().indexOf('MAC') >= 0 &&
+    (window as any).chrome != null) {
   registerTestEnv({
     name: 'webgpu',
     backendName: 'webgpu',
diff --git a/e2e/scripts/local-registry.sh b/e2e/scripts/local-registry.sh
index cd9909cc42c..67b6161ddca 100644
--- a/e2e/scripts/local-registry.sh
+++ b/e2e/scripts/local-registry.sh
@@ -14,10 +14,10 @@
 # limitations under the License.
 # ==============================================================================
 
-custom_registry_url=http://localhost:4873
+custom_registry_url=http://127.0.0.1:4873
 original_npm_registry_url=`npm get registry`
 original_yarn_registry_url=`yarn config get registry`
-default_verdaccio_package=verdaccio@4.8.1
+default_verdaccio_package=verdaccio@5.9.0
 
 function startLocalRegistry {
   # Start local registry
diff --git a/e2e/scripts/publish-tfjs-ci.sh b/e2e/scripts/publish-tfjs-ci.sh
deleted file mode 100755
index 98e1943c01e..00000000000
--- a/e2e/scripts/publish-tfjs-ci.sh
+++ /dev/null
@@ -1,92 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2020 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-# Start in scripts/ even if run from root directory
-cd "$(dirname "$0")"
-
-# Get release version from tfjs-core's package.json file.
-function getReleaseVersion {
-  local version=""
-  local regex="\"version\": \"(.*)\""
-  while read line
-  do
-    if [[ $line =~ $regex ]]; then
-      version="${BASH_REMATCH[1]}"
-      break
-    fi
-  done < "tfjs-core/package.json"
-  echo "$version"
-}
-
-# Exit the script on any command with non 0 return code
-set -e
-
-# Echo every command being executed
-set -x
-
-# Go to root
-cd ../../
-
-# Yarn in the top-level
-yarn
-
-RELEASE_VERSION=`getReleaseVersion`
-
-if [[ -z ${RELEASE_VERSION} ]]; then
-  echo "Expect a valid release version, but got ${RELEASE_VERSION}"
-  exit 1
-else
-  echo "Publishing version ${RELEASE_VERSION}"
-fi
-
-# All packages to publish. This includes Bazel packages.
-PACKAGES=("tfjs-core" "tfjs-backend-cpu" "tfjs-backend-webgl" \
-"tfjs-backend-wasm" "tfjs-layers" "tfjs-converter" "tfjs-data" "tfjs" \
-"tfjs-node" "tfjs-node-gpu")
-
-# Packages that build with Bazel
-BAZEL_PACKAGES=("tfjs-core" "tfjs-backend-cpu" "tfjs-tfdf" "tfjs-tflite"
-"tfjs-converter" "tfjs-backend-webgl" "tfjs-backend-webgpu" "tfjs-layers"
-"tfjs-data" "tfjs-backend-wasm")
-
-for package in "${PACKAGES[@]}"
-do
-  cd $package
-
-  # tfjs-node-gpu needs to get some files from tfjs-node.
-  if [[ $package == "tfjs-node-gpu" ]]; then
-    yarn prep-gpu
-  fi
-
-  # Install dependencies.
-  yarn
-
-  if [[ " ${BAZEL_PACKAGES[@]} " =~ " ${package} " ]]; then
-    # Build and publish to local npm.
-    echo "Publishing $package using Bazel"
-    yarn publish-npm
-  else
-    echo "Publishing $package using npm"
-    # Build npm.
-    yarn build-npm for-publish
-
-    # Publish to local npm.
-    npm publish
-  fi
-  echo "Published ${package}@${RELEASE_VERSION}"
-
-  cd ..
-done
diff --git a/e2e/scripts/release-e2e.sh b/e2e/scripts/release-e2e.sh
index 15096e99c1b..b87ac814b50 100755
--- a/e2e/scripts/release-e2e.sh
+++ b/e2e/scripts/release-e2e.sh
@@ -58,16 +58,20 @@ e2e_root_path=$PWD
 # Load functions for working with local NPM registry (Verdaccio)
 source "$e2e_root_path"/scripts/local-registry.sh
 
-# Start the local NPM registry
-startLocalRegistry "$e2e_root_path"/scripts/verdaccio.yaml
-
-# Publish the monorepo and update package.json tfjs dependency to the
-# published version.
-"$e2e_root_path"/scripts/publish-tfjs-ci.sh
+# Publish the monorepo to the local NPM registry. Note this publish script will
+# automatically start and stop the registry while it publishes.
+cd "$e2e_root_path"/../
+yarn publish-npm --release-this-branch --dry --ci 'tfjs-core' \
+  'tfjs-backend-cpu' 'tfjs-backend-webgl' 'tfjs-backend-webgpu' \
+  'tfjs-backend-wasm' 'tfjs-layers' 'tfjs-converter' 'tfjs-data' 'tfjs' \
+  'tfjs-node' 'tfjs-node-gpu'
+cd "$e2e_root_path"
 
 # ****************************************************************************
 # Second, install the packages from local registry and fetch golden data for testing.
 # ****************************************************************************
+# Start the local NPM registry
+startLocalRegistry "$e2e_root_path"/scripts/verdaccio.yaml
 # First make sure npm install succeeds.
 npm install
 rm -rf node_modules
diff --git a/e2e/scripts/verdaccio.yaml b/e2e/scripts/verdaccio.yaml
index 4ea6aaf1039..ff7ed606491 100644
--- a/e2e/scripts/verdaccio.yaml
+++ b/e2e/scripts/verdaccio.yaml
@@ -13,6 +13,10 @@ uplinks:
   npmjs:
     url: https://registry.npmjs.org/
 
+# Enable messaging so node can tell when verdaccio starts.
+# https://verdaccio.org/docs/verdaccio-programmatically/#using-fork-from-child_process-module
+_debug: true
+
 # Fine-grained control of package access, we set it
 # to allow all users to read and publish all packages.
 packages:
diff --git a/scripts/package_dependencies.json b/scripts/package_dependencies.json
index 36d07aada6e..5cf8b6d5c0a 100644
--- a/scripts/package_dependencies.json
+++ b/scripts/package_dependencies.json
@@ -1,5 +1,5 @@
 {
-  "e2e": ["tfjs", "tfjs-converter", "tfjs-node", "tfjs-backend-wasm"],
+  "e2e": ["tfjs", "tfjs-converter", "tfjs-node", "tfjs-backend-wasm", "tfjs-backend-webgpu"],
   "tfjs": ["tfjs-backend-cpu", "tfjs-backend-webgl", "tfjs-converter", "tfjs-core", "tfjs-data", "tfjs-layers"],
   "tfjs-automl": [],
   "tfjs-backend-cpu": ["tfjs-core"],
diff --git a/scripts/publish-npm.ts b/scripts/publish-npm.ts
index 9a6fc6d6b41..7cc6e7f7a94 100755
--- a/scripts/publish-npm.ts
+++ b/scripts/publish-npm.ts
@@ -55,6 +55,25 @@ async function retry<T>(f: () => T, tries = 3, sleep=5_000): Promise<T> {
   throw lastError;
 }
 
+/**
+ * For sets `a` and `b`, compute the set difference `a \ b`
+ *
+ * The set difference of `a` and `b`, denoted `a \ b`, is the set containing all
+ * elements of `a` that are not in `b`
+ *
+ * @param a The set to subtract from
+ * @param b The set to remove from `a` when creating the output set
+ */
+function setDifference<T>(a: Set<T>, b: Set<T>): Set<T> {
+  const difference = new Set<T>();
+  for (const val of a) {
+    if (!b.has(val)) {
+      difference.add(val);
+    }
+  }
+  return difference;
+}
+
 const parser = new argparse.ArgumentParser();
 parser.addArgument('--git-protocol', {
   action: 'storeTrue',
@@ -89,6 +108,11 @@ parser.addArgument(['--auto-publish-local-newer'], {
       + ' the packages in the registry',
 });
 
+parser.addArgument(['--ci'], {
+  action: 'storeTrue',
+  help: 'Enable CI bazel flags for faster compilation. No effect on results.',
+});
+
 parser.addArgument(['packages'], {
   type: 'string',
   nargs: '*',
@@ -268,23 +292,19 @@ async function main() {
   // 3. Interactively on the command line.
   let packages: string[];
   if (args.packages.length > 0) {
-    // Get packages to publish from args
-    const errorMessages: string[] = [];
+    // Get packages to publish from the 'packages' arg
     // Filter from the set of all packages to make sure they end up
     // in topological order.
     const allPackages = getPackages(PUBLISHABLE_RELEASE_UNITS);
-    const toPublish = new Set(args.packages);
-    packages = allPackages.filter(pkg => {
-      if (!toPublish.has(pkg)) {
-        errorMessages.push(`Package ${pkg} is not a tfjs package.`);
-        return false;
-      }
-      return true;
-    })
-
-    if (errorMessages.length > 0) {
-      throw new Error(errorMessages.join('\n') +
-        `Supported packages are:\n${[...ALL_PACKAGES].join('\n')}`);
+    const requestedPackages = new Set(args.packages);
+    packages = allPackages.filter(pkg => requestedPackages.has(pkg));
+
+    // Check if there are any unsupported packages requested by the user
+    const unsupportedPackages = setDifference(requestedPackages,
+                                              new Set(packages));
+    if (unsupportedPackages.size > 0) {
+      throw new Error(`Can not publish ${[...unsupportedPackages]}. `
+              + `Supported packages are:\n${[...ALL_PACKAGES].join('\n')}`);
     }
   } else if (args.auto_publish_local_newer) {
     // Automatically select packages based on npm versions
@@ -323,9 +343,14 @@ async function main() {
   // efficiency.
   const bazelTargets = packages.filter(pkg => BAZEL_PACKAGES.has(pkg))
     .map(name => `//${name}:${name}_pkg`);
+
+  const bazelArgs = ['bazel', 'build']
+  if (args.ci) {
+    bazelArgs.push('--config=ci');
+  }
   // Use child_process.spawnSync to show bazel build progress.
   const result = child_process.spawnSync('yarn',
-                                         ['bazel', 'build', ...bazelTargets],
+                                         [...bazelArgs, ...bazelTargets],
                                          {stdio:'inherit'});
   if (result.status !== 0) {
     throw new Error(`Bazel process failed with exit code ${result.status}`);
@@ -334,7 +359,7 @@ async function main() {
   // Build and publish all packages to a local Verdaccio repo for staging.
   console.log(
     chalk.magenta.bold('~~~ Staging packages locally in Verdaccio ~~~'));
-  const verdaccio = runVerdaccio();
+  const verdaccio = await runVerdaccio();
   try {
     for (const pkg of packages) {
       await publish(pkg, VERDACCIO_REGISTRY);
diff --git a/scripts/release-util.ts b/scripts/release-util.ts
index 8b074a5aceb..e945b2b9ffa 100755
--- a/scripts/release-util.ts
+++ b/scripts/release-util.ts
@@ -19,17 +19,20 @@
 import chalk from 'chalk';
 import * as fs from 'fs';
 import * as inquirer from 'inquirer';
-import { Separator } from 'inquirer';
+import {Separator} from 'inquirer';
 import mkdirp from 'mkdirp';
 import * as readline from 'readline';
 import * as shell from 'shelljs';
 import rimraf from 'rimraf';
 import * as path from 'path';
+import {ChildProcess, fork} from 'child_process';
 
 export interface Phase {
   // The list of packages that will be updated with this change.
   packages: string[];
   // The list of dependencies that all of the packages will update to.
+  // TODO(mattSoulanille): Parse this from package_dependencies.json or from the
+  // package.json file of each package.
   deps?: string[];
   // An ordered map of scripts, key is package name, value is an object with two
   // optional fields: `before-yarn` with scripts to run before `yarn`, and
@@ -146,7 +149,8 @@ export const E2E_PHASE: Phase = {
   packages: ['e2e'],
   deps: [
     'tfjs', 'tfjs-backend-cpu', 'tfjs-backend-wasm', 'tfjs-backend-webgl',
-    'tfjs-converter', 'tfjs-core', 'tfjs-data', 'tfjs-layers', 'tfjs-node'
+    'tfjs-backend-webgpu', 'tfjs-converter', 'tfjs-core', 'tfjs-data',
+    'tfjs-layers', 'tfjs-node'
   ],
 }
 
@@ -617,28 +621,46 @@ export function memoize<I, O>(f: (arg: I) => Promise<O>): (arg: I) => Promise<O>
   }
 }
 
-export function runVerdaccio() {
+export async function runVerdaccio(): Promise<ChildProcess> {
   // Remove the verdaccio package store.
   // TODO(mattsoulanille): Move the verdaccio storage and config file here
   // once the nightly verdaccio tests are handled by this script.
   rimraf.sync(path.join(__dirname, '../e2e/scripts/storage'));
-  // Start verdaccio.
-  const serverProcess = shell.exec(
-      'yarn verdaccio --config=e2e/scripts/verdaccio.yaml',
-      {
-        async: true,
-        silent: true,
-        cwd: path.join(__dirname, '../'),
-      },
-      (code, stdout, stderr) => {
-        if (code !== 0) {
-          console.log(`Verdaccio stopped with exit code ${code}`);
-          console.log(stdout);
-          console.log(stderr);
-        }
+
+  // Start verdaccio. It must be started directly from its binary so that IPC
+  // messaging works and verdaccio can tell node that it has started.
+  // https://verdaccio.org/docs/verdaccio-programmatically/#using-fork-from-child_process-module
+  const verdaccioBin = require.resolve('verdaccio/bin/verdaccio');
+  const serverProcess = fork(verdaccioBin, ['--config=e2e/scripts/verdaccio.yaml']);
+  const ready = new Promise<void>((resolve, reject) => {
+    const timeLimitMilliseconds = 30_000;
+    console.log(`Waiting ${timeLimitMilliseconds / 1000} seconds for ` +
+                'verdaccio to start....');
+    const timeout = setTimeout(() => {
+      serverProcess.kill();
+      reject(`Verdaccio did not start in ${timeLimitMilliseconds} seconds.`);
+    }, timeLimitMilliseconds);
+
+    serverProcess.on('message', (msg: {verdaccio_started: boolean}) => {
+      if (msg.verdaccio_started) {
+        console.log('Verdaccio Started.');
+        clearTimeout(timeout);
+        resolve();
       }
-  );
+    });
+  });
+
+  serverProcess.on('error', (err: unknown) => {
+    throw new Error(`Verdaccio error: ${err}`);
+  });
+  serverProcess.on('disconnect', (err: unknown) => {
+    throw new Error(`Verdaccio disconnected: ${err}`);
+  });
+
+  // Kill verdaccio when node exits.
   process.on('exit', () => {serverProcess.kill();});
+
+  await ready;
   return serverProcess;
 }
 

From 4cabd2c9a4c325c8950d2f2b5d1cd7d374dfd34c Mon Sep 17 00:00:00 2001
From: Matthew Soulanille <msoulanille@google.com>
Date: Tue, 16 May 2023 16:46:04 -0700
Subject: [PATCH 22/32] Run yarn before running the release e2e tests (#7687)

---
 cloudbuild-release.yml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/cloudbuild-release.yml b/cloudbuild-release.yml
index 588d670bd5a..89b4e24b914 100644
--- a/cloudbuild-release.yml
+++ b/cloudbuild-release.yml
@@ -1,5 +1,11 @@
 steps:
 
+# Install top-level deps.
+- name: 'gcr.io/learnjs-174218/release'
+  entrypoint: 'yarn'
+  id: 'yarn-common'
+  args: ['install']
+
 # Release e2e flow.
 - name: 'gcr.io/learnjs-174218/release'
   dir: 'e2e'

From 03c60538044c6a249a5ca787712618078c7dbe82 Mon Sep 17 00:00:00 2001
From: Linchenn <40653845+Linchenn@users.noreply.github.com>
Date: Wed, 17 May 2023 11:19:33 -0700
Subject: [PATCH 23/32] Improve the code snippet for texture to tensor (#7694)

DOC

* Improve example

* add
---
 tfjs-core/src/ops/tensor.ts | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/tfjs-core/src/ops/tensor.ts b/tfjs-core/src/ops/tensor.ts
index d3bba0ccef4..a495e7523b5 100644
--- a/tfjs-core/src/ops/tensor.ts
+++ b/tfjs-core/src/ops/tensor.ts
@@ -50,11 +50,13 @@ import {makeTensor} from './tensor_ops_util';
  * // downloading the values.
  *
  * // Example for WebGL2:
- * const customCanvas = document.createElement('canvas');
- * const customBackend = new tf.MathBackendWebGL(customCanvas);
- * tf.registerBackend('custom-webgl', () => customBackend);
+ * if (tf.findBackend('custom-webgl') == null) {
+ *   const customCanvas = document.createElement('canvas');
+ *   const customBackend = new tf.MathBackendWebGL(customCanvas);
+ *   tf.registerBackend('custom-webgl', () => customBackend);
+ * }
  * await tf.setBackend('custom-webgl');
- * const gl = customBackend.gpgpu.gl;
+ * const gl = tf.backend().gpgpu.gl;
  * const texture = gl.createTexture();
  * const tex2d = gl.TEXTURE_2D;
  * const width = 2;
@@ -81,6 +83,7 @@ import {makeTensor} from './tensor_ops_util';
  *
  * const logicalShape = [height * width * 2];
  * const a = tf.tensor({texture, height, width, channels: 'BR'}, logicalShape);
+ * a.print();
  * // Tensor value will be [2, 0, 6, 4, 10, 8, 14, 12], since [2, 0] is the
  * // values of 'B' and 'R' channels of Pixel0, [6, 4] is the values of 'B' and
  * 'R'
@@ -158,6 +161,7 @@ import {makeTensor} from './tensor_ops_util';
  * const a = tf.tensor({buffer: aBuffer}, shape, dtype);
  * const b = tf.tensor(bData, shape, dtype);
  * const result = tf.add(a, b);
+ * result.print();
  * a.dispose();
  * b.dispose();
  * result.dispose();

From de94eec1990306fcc80f9ff1fd9995ff5621418c Mon Sep 17 00:00:00 2001
From: Matthew Soulanille <msoulanille@google.com>
Date: Wed, 17 May 2023 11:50:25 -0700
Subject: [PATCH 24/32] Fix tfjs-release not updating all tfjs versions of
 subpackages (#7550)

Some TFJS packages, like wasm, have examples or demos in them. These usually depend on the parent package, but the parent package is not marked as to be updated when updating the subpackage dependency versions. For an example of this, see #7547.

Update the TFJS dependencies of these subpackages to the release version if they are `link:` dependencies.
---
 .../browserstack-benchmark/package.json       |   2 +-
 .../browserstack-benchmark/yarn.lock          | 123 +++++++++-----
 scripts/generate_cloudbuild.ts                | 136 +---------------
 scripts/graph_utils.ts                        | 150 ++++++++++++++++++
 scripts/release-tfjs.ts                       |  49 +++++-
 scripts/release-util.ts                       |  58 +++----
 6 files changed, 302 insertions(+), 216 deletions(-)
 create mode 100644 scripts/graph_utils.ts

diff --git a/e2e/benchmarks/browserstack-benchmark/package.json b/e2e/benchmarks/browserstack-benchmark/package.json
index 3d33d4898cb..5f208b47bcd 100644
--- a/e2e/benchmarks/browserstack-benchmark/package.json
+++ b/e2e/benchmarks/browserstack-benchmark/package.json
@@ -10,7 +10,6 @@
   "devDependencies": {
     "@tensorflow/tfjs": "link:../../../tfjs",
     "@tensorflow/tfjs-backend-wasm": "link:../../../link-package/node_modules/@tensorflow/tfjs-backend-wasm",
-    "@tensorflow/tfjs-vis": "link:../../../tfjs-vis",
     "argparse": "^2.0.1",
     "firebase-admin": "^11.0.1",
     "jasmine": "^3.7.0",
@@ -45,6 +44,7 @@
     "minimist": "1.2.6"
   },
   "dependencies": {
+    "@tensorflow/tfjs-vis": "^1.5.1",
     "JSONStream": "^1.3.5"
   }
 }
diff --git a/e2e/benchmarks/browserstack-benchmark/yarn.lock b/e2e/benchmarks/browserstack-benchmark/yarn.lock
index 77070f2e9ff..43d9360ff97 100644
--- a/e2e/benchmarks/browserstack-benchmark/yarn.lock
+++ b/e2e/benchmarks/browserstack-benchmark/yarn.lock
@@ -216,41 +216,41 @@
   resolved "https://registry.yarnpkg.com/@socket.io/base64-arraybuffer/-/base64-arraybuffer-1.0.2.tgz#568d9beae00b0d835f4f8c53fd55714986492e61"
   integrity sha512-dOlCBKnDw4iShaIsH/bxujKTM18+2TOAsYz+KSc11Am38H4q5Xw8Bbz97ZYdrVNM+um3p7w86Bvvmcn9q+5+eQ==
 
-"@tensorflow/tfjs-backend-cpu@file:../../../link-package/node_modules/@tensorflow/tfjs-backend-cpu":
+"@tensorflow/tfjs-backend-cpu@link:../../../link-package/node_modules/@tensorflow/link-package/node_modules/@tensorflow/tfjs-backend-cpu":
   version "0.0.0"
-  dependencies:
-    "@types/seedrandom" "2.4.27"
-    seedrandom "2.4.3"
 
 "@tensorflow/tfjs-backend-cpu@link:../../../link-package/node_modules/@tensorflow/tfjs-backend-cpu":
   version "0.0.0"
-  dependencies:
-    "@types/seedrandom" "2.4.27"
-    seedrandom "2.4.3"
+  uid ""
 
 "@tensorflow/tfjs-backend-wasm@link:../../../link-package/node_modules/@tensorflow/tfjs-backend-wasm":
   version "0.0.0"
-  dependencies:
-    "@tensorflow/tfjs-backend-cpu" "file:../../../link-package/node_modules/@tensorflow/tfjs-backend-cpu"
-    "@types/emscripten" "~0.0.34"
+  uid ""
 
 "@tensorflow/tfjs-backend-webgl@link:../../../link-package/node_modules/@tensorflow/tfjs-backend-webgl":
   version "0.0.0"
+  uid ""
 
 "@tensorflow/tfjs-converter@link:../../../link-package/node_modules/@tensorflow/tfjs-converter":
   version "0.0.0"
+  uid ""
 
 "@tensorflow/tfjs-core@link:../../../link-package/node_modules/@tensorflow/tfjs-core":
   version "0.0.0"
+  uid ""
 
 "@tensorflow/tfjs-data@link:../../../link-package/node_modules/@tensorflow/tfjs-data":
   version "0.0.0"
+  uid ""
 
 "@tensorflow/tfjs-layers@link:../../../link-package/node_modules/@tensorflow/tfjs-layers":
   version "0.0.0"
+  uid ""
 
-"@tensorflow/tfjs-vis@link:../../../tfjs-vis":
-  version "1.4.3"
+"@tensorflow/tfjs-vis@^1.5.1":
+  version "1.5.1"
+  resolved "https://registry.yarnpkg.com/@tensorflow/tfjs-vis/-/tfjs-vis-1.5.1.tgz#e959832ee42ed99f71b073a3e6cf8bbfe136e589"
+  integrity sha512-oNithKiR7VZaE+xUvz6Leww4TYEPhKi8j5xnEYvT3j7brK2Njdvril7UgFtZ8EYZBdeX6XNim5Eu3/23gTQ1dA==
   dependencies:
     d3-format "~1.3.0"
     d3-selection "~1.3.0"
@@ -262,18 +262,7 @@
 
 "@tensorflow/tfjs@link:../../../tfjs":
   version "0.0.0"
-  dependencies:
-    "@tensorflow/tfjs-backend-cpu" "link:../../../link-package/node_modules/@tensorflow/tfjs-backend-cpu"
-    "@tensorflow/tfjs-backend-webgl" "link:../../../link-package/node_modules/@tensorflow/tfjs-backend-webgl"
-    "@tensorflow/tfjs-converter" "link:../../../link-package/node_modules/@tensorflow/tfjs-converter"
-    "@tensorflow/tfjs-core" "link:../../../link-package/node_modules/@tensorflow/tfjs-core"
-    "@tensorflow/tfjs-data" "link:../../../link-package/node_modules/@tensorflow/tfjs-data"
-    "@tensorflow/tfjs-layers" "link:../../../link-package/node_modules/@tensorflow/tfjs-layers"
-    argparse "^1.0.10"
-    chalk "^4.1.0"
-    core-js "3"
-    regenerator-runtime "^0.13.5"
-    yargs "^16.0.3"
+  uid ""
 
 "@tootallnate/once@2":
   version "2.0.0"
@@ -386,6 +375,14 @@
   resolved "https://registry.yarnpkg.com/@types/mime/-/mime-3.0.1.tgz#5f8f2bca0a5863cb69bc0b0acd88c96cb1d4ae10"
   integrity sha512-Y4XFY5VJAuw0FgAqPNd6NNoV44jbq9Bz2L7Rh/J6jLTiHBSBJa9fxqQIvkIld4GsoDOcCbvzOUAbLPsSKKg+uA==
 
+"@types/node-fetch@^2.1.2":
+  version "2.6.3"
+  resolved "https://registry.yarnpkg.com/@types/node-fetch/-/node-fetch-2.6.3.tgz#175d977f5e24d93ad0f57602693c435c57ad7e80"
+  integrity sha512-ETTL1mOEdq/sxUtgtOhKjyB2Irra4cjxksvcMUR5Zr4n+PxVhsCD9WS46oPbHL3et9Zde7CNRr+WUNlcHvsX+w==
+  dependencies:
+    "@types/node" "*"
+    form-data "^3.0.0"
+
 "@types/node@*":
   version "18.11.9"
   resolved "https://registry.yarnpkg.com/@types/node/-/node-18.11.9.tgz#02d013de7058cea16d36168ef2fc653464cfbad4"
@@ -396,6 +393,16 @@
   resolved "https://registry.yarnpkg.com/@types/node/-/node-17.0.38.tgz#f8bb07c371ccb1903f3752872c89f44006132947"
   integrity sha512-5jY9RhV7c0Z4Jy09G+NIDTsCZ5G0L5n+Z+p+Y7t5VJHM30bgwzSjVtlcBxqAj+6L/swIlvtOSzr8rBk/aNyV2g==
 
+"@types/offscreencanvas@~2019.3.0":
+  version "2019.3.0"
+  resolved "https://registry.yarnpkg.com/@types/offscreencanvas/-/offscreencanvas-2019.3.0.tgz#3336428ec7e9180cf4566dfea5da04eb586a6553"
+  integrity sha512-esIJx9bQg+QYF0ra8GnvfianIY8qWB0GBx54PK5Eps6m+xTj86KLavHv6qDhzKcu5UUOgNfJ2pWaIIV7TRUd9Q==
+
+"@types/offscreencanvas@~2019.7.0":
+  version "2019.7.0"
+  resolved "https://registry.yarnpkg.com/@types/offscreencanvas/-/offscreencanvas-2019.7.0.tgz#e4a932069db47bb3eabeb0b305502d01586fa90d"
+  integrity sha512-PGcyveRIpL1XIqK8eBsmRBt76eFgtzuPiSTyKHZxnGemp2yzGzWpjYKAfK3wIMiU7eH+851yEpiuP8JZerTmWg==
+
 "@types/qs@*":
   version "6.9.7"
   resolved "https://registry.yarnpkg.com/@types/qs/-/qs-6.9.7.tgz#63bb7d067db107cc1e457c303bc25d511febf6cb"
@@ -406,10 +413,10 @@
   resolved "https://registry.yarnpkg.com/@types/range-parser/-/range-parser-1.2.4.tgz#cd667bcfdd025213aafb7ca5915a932590acdcdc"
   integrity sha512-EEhsLsD6UsDM1yFhAvy0Cjr6VwmpMWqFBCb9w07wVugF7w9nfajxLuVmngTIpgS6svCnm6Vaw+MZhoDCKnOfsw==
 
-"@types/seedrandom@2.4.27":
-  version "2.4.27"
-  resolved "https://registry.yarnpkg.com/@types/seedrandom/-/seedrandom-2.4.27.tgz#9db563937dd86915f69092bc43259d2f48578e41"
-  integrity sha1-nbVjk33YaRX2kJK8QyWdL0hXjkE=
+"@types/seedrandom@^2.4.28":
+  version "2.4.30"
+  resolved "https://registry.yarnpkg.com/@types/seedrandom/-/seedrandom-2.4.30.tgz#d2efe425869b84163c2d56e779dddadb9372cbfa"
+  integrity sha512-AnxLHewubLVzoF/A4qdxBGHCKifw8cY32iro3DQX9TPcetE95zBeVt3jnsvtvAUf1vwzMfwzp4t/L2yqPlnjkQ==
 
 "@types/serve-static@*":
   version "1.15.0"
@@ -419,6 +426,16 @@
     "@types/mime" "*"
     "@types/node" "*"
 
+"@types/webgl-ext@0.0.30":
+  version "0.0.30"
+  resolved "https://registry.yarnpkg.com/@types/webgl-ext/-/webgl-ext-0.0.30.tgz#0ce498c16a41a23d15289e0b844d945b25f0fb9d"
+  integrity sha512-LKVgNmBxN0BbljJrVUwkxwRYqzsAEPcZOe6S2T6ZaBDIrFp0qu4FNlpc5sM1tGbXUYFgdVQIoeLk1Y1UoblyEg==
+
+"@webgpu/types@0.1.21":
+  version "0.1.21"
+  resolved "https://registry.yarnpkg.com/@webgpu/types/-/types-0.1.21.tgz#b181202daec30d66ccd67264de23814cfd176d3a"
+  integrity sha512-pUrWq3V5PiSGFLeLxoGqReTZmiiXwY3jRkIG5sLLKjyqNxrwm/04b4nw7LSmGWJcKk59XOM/YRTUwOzo4MMlow==
+
 JSONStream@^1.3.5:
   version "1.3.5"
   resolved "https://registry.yarnpkg.com/JSONStream/-/JSONStream-1.3.5.tgz#3208c1f08d3a4d99261ab64f92302bc15e111ca0"
@@ -525,6 +542,11 @@ async-retry@^1.3.3:
   dependencies:
     retry "0.13.1"
 
+asynckit@^0.4.0:
+  version "0.4.0"
+  resolved "https://registry.yarnpkg.com/asynckit/-/asynckit-0.4.0.tgz#c79ed97f7f34cb8f2ba1bc9790bcc366474b4b79"
+  integrity sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==
+
 balanced-match@^1.0.0:
   version "1.0.2"
   resolved "https://registry.yarnpkg.com/balanced-match/-/balanced-match-1.0.2.tgz#e83e3a7e3f300b34cb9d87f615fa0cbf357690ee"
@@ -695,6 +717,13 @@ color-name@~1.1.4:
   resolved "https://registry.yarnpkg.com/color-name/-/color-name-1.1.4.tgz#c2a09a87acbde69543de6f63fa3995c826c536a2"
   integrity sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==
 
+combined-stream@^1.0.8:
+  version "1.0.8"
+  resolved "https://registry.yarnpkg.com/combined-stream/-/combined-stream-1.0.8.tgz#c3d45a8b34fd730631a110a8a2520682b31d5a7f"
+  integrity sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==
+  dependencies:
+    delayed-stream "~1.0.0"
+
 commander@2:
   version "2.20.3"
   resolved "https://registry.yarnpkg.com/commander/-/commander-2.20.3.tgz#fd485e84c03eb4881c20722ba48035e8531aeb33"
@@ -742,10 +771,10 @@ cookie@~0.4.1:
   resolved "https://registry.yarnpkg.com/cookie/-/cookie-0.4.2.tgz#0e41f24de5ecf317947c82fc789e06a884824432"
   integrity "sha1-DkHyTeXs8xeUfIL8eJ4GqISCRDI= sha512-aSWTXFzaKWkvHO1Ny/s+ePFpvKsPnjc551iI41v3ny/ow6tBG5Vd+FuqGNhh1LxOmVzOlGUriIlOaokOvhaStA=="
 
-core-js@3:
-  version "3.21.1"
-  resolved "https://registry.yarnpkg.com/core-js/-/core-js-3.21.1.tgz#f2e0ddc1fc43da6f904706e8e955bc19d06a0d94"
-  integrity sha512-FRq5b/VMrWlrmCzwRrpDYNxyHP9BcAZC+xHJaqTgIE5091ZV1NTmyh0sGOg5XqpnHvR0svdy0sv1gWA1zmhxig==
+core-js@3.29.1:
+  version "3.29.1"
+  resolved "https://registry.yarnpkg.com/core-js/-/core-js-3.29.1.tgz#40ff3b41588b091aaed19ca1aa5cb111803fa9a6"
+  integrity sha512-+jwgnhg6cQxKYIIjGtAHq2nwUOolo9eoFZ4sHfUH09BLXBgxnH4gA0zEd+t+BO2cNB8idaBtZFcFTRjQJRJmAw==
 
 core-js@^1.0.0:
   version "1.2.7"
@@ -1033,6 +1062,11 @@ delaunator@4:
   resolved "https://registry.yarnpkg.com/delaunator/-/delaunator-4.0.1.tgz#3d779687f57919a7a418f8ab947d3bddb6846957"
   integrity sha512-WNPWi1IRKZfCt/qIDMfERkDp93+iZEmOxN2yy4Jg+Xhv8SLk2UTqqbe1sfiipn0and9QrE914/ihdx82Y/Giag==
 
+delayed-stream@~1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/delayed-stream/-/delayed-stream-1.0.0.tgz#df3ae199acadfb7d440aaae0b29e2272b24ec619"
+  integrity sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==
+
 depd@~1.1.2:
   version "1.1.2"
   resolved "https://registry.yarnpkg.com/depd/-/depd-1.1.2.tgz#9bcd52e14c097763e749b274c4346ed2e560b5a9"
@@ -1331,6 +1365,15 @@ follow-redirects@^1.0.0:
   resolved "https://registry.yarnpkg.com/follow-redirects/-/follow-redirects-1.14.9.tgz#dd4ea157de7bfaf9ea9b3fbd85aa16951f78d8d7"
   integrity sha512-MQDfihBQYMcyy5dhRDJUHcw7lb2Pv/TuE6xP1vyraLukNDHKbDxDNaOE3NbCAdKQApno+GPRyo1YAp89yCjK4w==
 
+form-data@^3.0.0:
+  version "3.0.1"
+  resolved "https://registry.yarnpkg.com/form-data/-/form-data-3.0.1.tgz#ebd53791b78356a99af9a300d4282c4d5eb9755f"
+  integrity sha512-RHkBKtLWUVwd7SqRIvCZMEvAMoGUp0XU+seQiZejj0COz3RI3hWP4sCv3gZWWLjJTd7rGwcsF5eKZGii0r/hbg==
+  dependencies:
+    asynckit "^0.4.0"
+    combined-stream "^1.0.8"
+    mime-types "^2.1.12"
+
 from@~0:
   version "0.1.7"
   resolved "https://registry.yarnpkg.com/from/-/from-0.1.7.tgz#83c60afc58b9c56997007ed1a768b3ab303a44fe"
@@ -1978,7 +2021,7 @@ log4js@^6.4.1:
     rfdc "^1.3.0"
     streamroller "^3.0.5"
 
-long@^4.0.0:
+long@4.0.0, long@^4.0.0:
   version "4.0.0"
   resolved "https://registry.yarnpkg.com/long/-/long-4.0.0.tgz#9a7b71cfb7d361a194ea555241c92f7468d5bf28"
   integrity sha512-XsP+KhQif4bjX1kbuSiySJFNAehNxgLb6hPRGJ9QsUr8ajHkuXGdrHmFUTUUXhDwVX2R5bY4JNZEwbUiMhV+MA==
@@ -2059,7 +2102,7 @@ mime-db@1.52.0, "mime-db@>= 1.43.0 < 2":
   resolved "https://registry.yarnpkg.com/mime-db/-/mime-db-1.52.0.tgz#bbabcdc02859f4987301c856e3387ce5ec43bf70"
   integrity sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==
 
-mime-types@^2.0.8, mime-types@~2.1.24, mime-types@~2.1.34:
+mime-types@^2.0.8, mime-types@^2.1.12, mime-types@~2.1.24, mime-types@~2.1.34:
   version "2.1.35"
   resolved "https://registry.yarnpkg.com/mime-types/-/mime-types-2.1.35.tgz#381a871b62a734450660ae3deee44813f70d959a"
   integrity sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==
@@ -2127,7 +2170,7 @@ negotiator@0.6.3:
   resolved "https://registry.yarnpkg.com/negotiator/-/negotiator-0.6.3.tgz#58e323a72fedc0d6f9cd4d31fe49f51479590ccd"
   integrity sha512-+EUsqGPLsM+j/zdChZjsnX51g4XrHFOIXwfnCVPGlQk/k5giakcKsuxCObBRu6DSm9opw/O6slWbJdghQM4bBg==
 
-node-fetch@2.6.7, node-fetch@^1.0.1, node-fetch@^2.6.1, node-fetch@^2.6.7:
+node-fetch@2.6.7, node-fetch@^1.0.1, node-fetch@^2.6.1, node-fetch@^2.6.7, node-fetch@~2.6.1:
   version "2.6.7"
   resolved "https://registry.yarnpkg.com/node-fetch/-/node-fetch-2.6.7.tgz#24de9fba827e3b4ae44dc8b20256a379160052ad"
   integrity sha512-ZjMPFEfVx5j+y2yF35Kzx5sF7kDzxuDj6ziH4FFbOp87zKDZNx8yExJIb05OGF4Nlt9IHFIMBkRl41VdvcNdbQ==
@@ -2451,10 +2494,10 @@ safe-buffer@>=5.1.0, safe-buffer@^5.0.1, safe-buffer@~5.2.0:
   resolved "https://registry.yarnpkg.com/safer-buffer/-/safer-buffer-2.1.2.tgz#44fa161b0187b9549dd84bb91802f9bd8385cd6a"
   integrity sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==
 
-seedrandom@2.4.3:
-  version "2.4.3"
-  resolved "https://registry.yarnpkg.com/seedrandom/-/seedrandom-2.4.3.tgz#2438504dad33917314bff18ac4d794f16d6aaecc"
-  integrity sha1-JDhQTa0zkXMUv/GKxNeU8W1qrsw=
+seedrandom@^3.0.5:
+  version "3.0.5"
+  resolved "https://registry.yarnpkg.com/seedrandom/-/seedrandom-3.0.5.tgz#54edc85c95222525b0c7a6f6b3543d8e0b3aa0a7"
+  integrity sha512-8OwmbklUNzwezjGInmZ+2clQmExQPvomqjL7LFqOYqtmuxRgQYqOD3mHaU+MvZn5FLUeVxVfQjwLZW/n/JFuqg==
 
 semver@^5.6.0:
   version "5.7.1"
@@ -2575,7 +2618,7 @@ string-width@^4.1.0, string-width@^4.2.0:
     is-fullwidth-code-point "^3.0.0"
     strip-ansi "^6.0.1"
 
-string_decoder@^1.1.1:
+string_decoder@^1.1.1, string_decoder@^1.3.0:
   version "1.3.0"
   resolved "https://registry.yarnpkg.com/string_decoder/-/string_decoder-1.3.0.tgz#42f114594a46cf1a8e30b0a84f56c78c3edac21e"
   integrity sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==
diff --git a/scripts/generate_cloudbuild.ts b/scripts/generate_cloudbuild.ts
index de9699fa29f..d9450cf8bfb 100644
--- a/scripts/generate_cloudbuild.ts
+++ b/scripts/generate_cloudbuild.ts
@@ -17,146 +17,12 @@ import {printTable} from 'console-table-printer';
 import * as fs from 'fs';
 import * as yaml from 'js-yaml';
 import * as path from 'path';
-
 import {BAZEL_PACKAGES} from './bazel_packages';
-
-
-const DEPENDENCY_GRAPH = JSON.parse(
-  fs.readFileSync(path.join(__dirname, 'package_dependencies.json'), 'utf8'));
-
-// This is a reverse dependencies graph. Each entry in the graph lists the
-// packages that depend on it.
-const REVERSE_DEPENDENCY_GRAPH = transposeGraph(DEPENDENCY_GRAPH);
-
-// Topologically sort the dependency tree and arrange
-// steps in dependency order.
-const DEPENDENCY_ORDER = topologicalSort(DEPENDENCY_GRAPH);
+import {DEPENDENCY_GRAPH, DEPENDENCY_ORDER, findDeps, findReverseDeps} from './graph_utils';
 
 // Steps to exclude from cloudbuild files.
 const EXCLUDE_STEPS = new Set(['build-deps', 'yarn-common']);
 
-type Graph<V extends Iterable<string> = Iterable<string>> = {
-  [node: string]: V
-}
-
-/**
- * Verify that an object is a valid graph.
- */
-function verifyGraph(graph: Graph) {
-  const nodes = new Set(Object.keys(graph));
-  for (const [node, edges] of Object.entries(graph)) {
-    for (const edge of edges) {
-      if (!nodes.has(edge)) {
-        throw new Error(
-            `Graph edge ${edge} of node ${node} not found in the graph`);
-      }
-    }
-  }
-}
-
-/**
- * Transpose a directed graph i.e. reverse the direction of the edges.
- */
-function transposeGraph(graph: Graph) {
-  verifyGraph(graph);
-  const transposed: Graph<Set<string>> = {};
-  for (const [nodeName, connectedNodes] of Object.entries(graph)) {
-    for (const connectedNode of connectedNodes) {
-      if (!transposed[connectedNode]) {
-        transposed[connectedNode] = new Set();
-      }
-      if (!transposed[nodeName]) {
-        // Make sure the node itself ends up in the transposed graph.
-        transposed[nodeName] = new Set();
-      }
-      transposed[connectedNode].add(nodeName);
-    }
-  }
-  return transposed;
-}
-
-/**
- * Topologically sort a directed acyclic graph.
- *
- * Returns a list of graph nodes such that, by following edges,
- * you can only move forward in the list, not backward.
- */
-function topologicalSort(graph: Graph) {
-  // We can't use a standard sorting algorithm because
-  // often, two packages won't have any dependency relationship
-  // between each other, meaning they are incomparable.
-  verifyGraph(graph);
-  const sorted: string[] = [];
-
-  while (sorted.length < Object.keys(graph).length) {
-    // Find nodes not yet in 'sorted' that have edges
-    // only to nodes already in 'sorted'
-    const emptyNodes = Object.entries(graph)
-                           .filter(([node, edges]) => {
-                             if (sorted.includes(node)) {
-                               return false;
-                             }
-                             for (const edge of edges) {
-                               if (!sorted.includes(edge)) {
-                                 return false;
-                               }
-                             }
-                             return true;
-                           })
-                           .map(([node, edges]) => node);
-
-    // If there are no such nodes, then the graph has a cycle.
-    if (emptyNodes.length === 0) {
-      throw new Error('Dependency graph has a cycle.');
-    }
-
-    for (let node of emptyNodes) {
-      sorted.push(node);
-    }
-  }
-  return sorted;
-}
-
-/**
- * Find all subnodes in the subgraph generated by taking the transitive
- * closure at `node`.
- */
-function findSubgraph(node: string, graph: Graph, subnodes = new Set()) {
-  const directSubnodes = graph[node];
-  if (directSubnodes) {
-    for (const directSubnode of directSubnodes) {
-      if (!subnodes.has(directSubnode)) {
-        subnodes.add(directSubnode);
-        findSubgraph(directSubnode, graph, subnodes);
-      }
-    }
-  }
-
-  return subnodes;
-}
-
-/**
- * Find the transitive closure of dependencies of the given packages.
- */
-function findDeps(packages: Iterable<string>) {
-  return new Set(
-      [...packages]
-          .map(packageName => findSubgraph(packageName, DEPENDENCY_GRAPH))
-          .reduce((a, b) => [...a, ...b], []));
-}
-
-/**
- * Find the reverse dependencies of the given packages, i.e. find the
- * set of packages that include at least one of the given packages in
- * their transitive closure of dependencies.
- */
-function findReverseDeps(packages: Iterable<string>) {
-  return new Set([
-    ...packages
-  ].map(packageName => findSubgraph(packageName, REVERSE_DEPENDENCY_GRAPH))
-                     .reduce((a, b) => [...a, ...b], []));
-}
-
 interface CloudbuildStep {
   name: string,
   id: string,
diff --git a/scripts/graph_utils.ts b/scripts/graph_utils.ts
new file mode 100644
index 00000000000..f19b0263369
--- /dev/null
+++ b/scripts/graph_utils.ts
@@ -0,0 +1,150 @@
+// Copyright 2023 Google LLC. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+
+import * as fs from 'fs';
+import * as path from 'path';
+
+export const DEPENDENCY_GRAPH = JSON.parse(
+  fs.readFileSync(path.join(__dirname, 'package_dependencies.json'), 'utf8'));
+
+// This is a reverse dependencies graph. Each entry in the graph lists the
+// packages that depend on it.
+export const REVERSE_DEPENDENCY_GRAPH = transposeGraph(DEPENDENCY_GRAPH);
+
+// Topologically sort the dependency tree and arrange
+// steps in dependency order.
+export const DEPENDENCY_ORDER = topologicalSort(DEPENDENCY_GRAPH);
+
+export type Graph<V extends Iterable<string> = Iterable<string>> = {
+  [node: string]: V
+}
+
+/**
+ * Verify that an object is a valid graph.
+ */
+export function verifyGraph(graph: Graph) {
+  const nodes = new Set(Object.keys(graph));
+  for (const [node, edges] of Object.entries(graph)) {
+    for (const edge of edges) {
+      if (!nodes.has(edge)) {
+        throw new Error(
+            `Graph edge ${edge} of node ${node} not found in the graph`);
+      }
+    }
+  }
+}
+
+/**
+ * Transpose a directed graph i.e. reverse the direction of the edges.
+ */
+export function transposeGraph(graph: Graph) {
+  verifyGraph(graph);
+  const transposed: Graph<Set<string>> = {};
+  for (const [nodeName, connectedNodes] of Object.entries(graph)) {
+    for (const connectedNode of connectedNodes) {
+      if (!transposed[connectedNode]) {
+        transposed[connectedNode] = new Set();
+      }
+      if (!transposed[nodeName]) {
+        // Make sure the node itself ends up in the transposed graph.
+        transposed[nodeName] = new Set();
+      }
+      transposed[connectedNode].add(nodeName);
+    }
+  }
+  return transposed;
+}
+
+/**
+ * Topologically sort a directed acyclic graph.
+ *
+ * Returns a list of graph nodes such that, by following edges,
+ * you can only move forward in the list, not backward.
+ */
+export function topologicalSort(graph: Graph) {
+  // We can't use a standard sorting algorithm because
+  // often, two packages won't have any dependency relationship
+  // between each other, meaning they are incomparable.
+  verifyGraph(graph);
+  const sorted: string[] = [];
+
+  while (sorted.length < Object.keys(graph).length) {
+    // Find nodes not yet in 'sorted' that have edges
+    // only to nodes already in 'sorted'
+    const emptyNodes = Object.entries(graph)
+                           .filter(([node, edges]) => {
+                             if (sorted.includes(node)) {
+                               return false;
+                             }
+                             for (const edge of edges) {
+                               if (!sorted.includes(edge)) {
+                                 return false;
+                               }
+                             }
+                             return true;
+                           })
+                           .map(([node, edges]) => node);
+
+    // If there are no such nodes, then the graph has a cycle.
+    if (emptyNodes.length === 0) {
+      throw new Error('Dependency graph has a cycle.');
+    }
+
+    for (let node of emptyNodes) {
+      sorted.push(node);
+    }
+  }
+  return sorted;
+}
+
+/**
+ * Find all subnodes in the subgraph generated by taking the transitive
+ * closure at `node`.
+ */
+export function findSubgraph(node: string, graph: Graph, subnodes = new Set()) {
+  const directSubnodes = graph[node];
+  if (directSubnodes) {
+    for (const directSubnode of directSubnodes) {
+      if (!subnodes.has(directSubnode)) {
+        subnodes.add(directSubnode);
+        findSubgraph(directSubnode, graph, subnodes);
+      }
+    }
+  }
+
+  return subnodes;
+}
+
+/**
+ * Find the transitive closure of dependencies of the given packages.
+ */
+export function findDeps(packages: Iterable<string>): Set<string> {
+  return new Set(
+      [...packages]
+          .map(packageName => findSubgraph(packageName, DEPENDENCY_GRAPH))
+          .reduce((a, b) => [...a, ...b], []));
+}
+
+/**
+ * Find the reverse dependencies of the given packages, i.e. find the
+ * set of packages that include at least one of the given packages in
+ * their transitive closure of dependencies.
+ */
+export function findReverseDeps(packages: Iterable<string>): Set<string> {
+  return new Set(
+    [...packages]
+      .map(packageName => findSubgraph(packageName, REVERSE_DEPENDENCY_GRAPH))
+      .reduce((a, b) => [...a, ...b], []));
+}
diff --git a/scripts/release-tfjs.ts b/scripts/release-tfjs.ts
index e9c2a99620d..c4c37251f33 100644
--- a/scripts/release-tfjs.ts
+++ b/scripts/release-tfjs.ts
@@ -30,6 +30,7 @@ import * as fs from 'fs';
 import * as shell from 'shelljs';
 import {TMP_DIR, $, question, makeReleaseDir, createPR, TFJS_RELEASE_UNIT, updateTFJSDependencyVersions, ALPHA_RELEASE_UNIT, getMinorUpdateVersion, getPatchUpdateVersion, E2E_PHASE, getReleaseBlockers, getNightlyVersion} from './release-util';
 import * as path from 'path';
+import {findDeps} from './graph_utils';
 
 const parser = new argparse.ArgumentParser({
   description: 'Create a release PR for the tfjs monorepo.',
@@ -139,8 +140,9 @@ async function main() {
   }
 
   // Guess release version from tfjs-core's latest version, with a minor update.
-  const newVersion = await getNewVersion('tfjs-core',
-      incrementVersion ?? getMinorUpdateVersion, !args.guess_version);
+  const newVersion = await getNewVersion(
+      'tfjs-core', incrementVersion ?? getMinorUpdateVersion,
+      !args.guess_version);
 
   // Populate the versions map with new versions for monorepo packages.
   const versions = new Map<string /* package name */, string /* version */>();
@@ -154,8 +156,9 @@ async function main() {
   // version as the other monorepo packages.
   for (const phase of ALPHA_RELEASE_UNIT.phases) {
     for (const packageName of phase.packages) {
-      const newVersion = await getNewVersion(packageName,
-          incrementVersion ?? getPatchUpdateVersion, !args.guess_version);
+      const newVersion = await getNewVersion(
+          packageName, incrementVersion ?? getPatchUpdateVersion,
+          !args.guess_version);
       versions.set(packageName, newVersion);
     }
   }
@@ -203,6 +206,7 @@ async function main() {
   // Update versions in package.json files.
   const phases =
       [...TFJS_RELEASE_UNIT.phases, ...ALPHA_RELEASE_UNIT.phases, E2E_PHASE];
+  const errors: Error[] = [];
   for (const phase of phases) {
     for (const packageName of phase.packages) {
       shell.cd(packageName);
@@ -223,14 +227,39 @@ async function main() {
       // Update dependency versions of all package.json files found in the
       // package to use the new verison numbers (except ones in node_modules).
       const subpackages =
-            $(`find ${packagePath} -name package.json -not -path \'*/node_modules/*\'`)
-            .split('\n');
+          $(`find ${
+                packagePath} -name package.json -not -path \'*/node_modules/*\'`)
+              .split('\n');
       for (const packageJsonPath of subpackages) {
         const pkg = fs.readFileSync(packageJsonPath, 'utf8');
         console.log(chalk.magenta.bold(
             `~~~ Update dependency versions for ${packageJsonPath} ~~~`));
-        const updated = updateTFJSDependencyVersions(pkg, versions, phase.deps || []);
-        fs.writeFileSync(packageJsonPath, updated);
+
+        // Only update versions that are a (possibly transitive) dependency of
+        // the package and are listed in the phase deps (we throw an error
+        // if we find a dependency that doesn't satisfy these conditions).
+        const transitiveDeps = [...findDeps([packageName])].filter(
+            dep => phase.deps.includes(dep));
+
+        // Also add the package itself so subpackages can use it.
+        // Some packages, like e2e, are never published to npm, so check first.
+        if (versions.has(packageName)) {
+          transitiveDeps.push(packageName);
+        }
+
+        const packageDependencyVersions =
+            new Map(transitiveDeps.map(dep => [dep, versions.get(dep)!]));
+
+        try {
+          const updated =
+              updateTFJSDependencyVersions(pkg, packageDependencyVersions);
+
+          fs.writeFileSync(packageJsonPath, updated);
+        } catch (e) {
+          e.message = `For ${packageJsonPath}, ${packageName} ${e.message}`;
+          console.error(e.stack);
+          errors.push(e);
+        }
       }
 
       shell.cd('..');
@@ -241,6 +270,10 @@ async function main() {
       }
     }
   }
+  if (errors.length > 0) {
+    throw new Error('Some package version updates had errors' + errors);
+  }
+
 
   // Use dev prefix to avoid branch being locked.
   const devBranchName = `dev_${releaseBranch}`;
diff --git a/scripts/release-util.ts b/scripts/release-util.ts
index e945b2b9ffa..57914363e1e 100755
--- a/scripts/release-util.ts
+++ b/scripts/release-util.ts
@@ -355,40 +355,34 @@ export function updateTFJSDependencyVersions(
 
   const parsedPkg = JSON.parse(pkg);
 
-  for (const dep of depsToReplace) {
-    const newVersion = versions.get(dep);
-    if (!newVersion) {
-      throw new Error(`No new version found for ${dep}`);
-    }
-    // Get the current dependency package version.
-    let version = '';
-    const depNpmName = `@tensorflow/${dep}`;
-    if (parsedPkg['dependencies'] != null &&
-        parsedPkg['dependencies'][depNpmName] != null) {
-      version = parsedPkg['dependencies'][depNpmName];
-    } else if (
-        parsedPkg['peerDependencies'] != null &&
-        parsedPkg['peerDependencies'][depNpmName] != null) {
-      version = parsedPkg['peerDependencies'][depNpmName];
-    } else if (
-        parsedPkg['devDependencies'] != null &&
-        parsedPkg['devDependencies'][depNpmName] != null) {
-      version = parsedPkg['devDependencies'][depNpmName];
-    }
-    if (version == null) {
-      throw new Error(`No dependency found for ${dep}.`);
-    }
-
-    let relaxedVersionPrefix = '';
-    if (version.startsWith('~') || version.startsWith('^')) {
-      relaxedVersionPrefix = version.slice(0, 1);
+  const dependencyMaps: Array<{[index: string]: string}> = [
+    parsedPkg['dependencies'],
+    parsedPkg['peerDependencies'],
+    parsedPkg['devDependencies'],
+  ].filter(v => v != null);
+
+  for (const dependencyMap of dependencyMaps) {
+    for (const [name, version] of Object.entries(dependencyMap)) {
+      const prefix = '@tensorflow/';
+      if (name.startsWith(prefix) && version.startsWith('link:')) {
+        const tfjsName = name.slice(prefix.length);
+        const newVersion = versions.get(tfjsName);
+        if (newVersion == null) {
+          throw new Error(`Versions map does not include ${tfjsName}`);
+        }
+
+        let relaxedVersionPrefix = '';
+        if (version.startsWith('~') || version.startsWith('^')) {
+          relaxedVersionPrefix = version.slice(0, 1);
+        }
+        const versionLatest = relaxedVersionPrefix + newVersion;
+        pkg = `${pkg}`.replace(
+          new RegExp(`"${name}": "${version}"`, 'g'),
+          `"${name}": "${versionLatest}"`);
+      }
     }
-    const versionLatest = relaxedVersionPrefix + newVersion;
-
-    pkg = `${pkg}`.replace(
-        new RegExp(`"${depNpmName}": "${version}"`, 'g'),
-        `"${depNpmName}": "${versionLatest}"`);
   }
+
   return pkg;
 }
 

From 80a809108ca3c3976acaf5e03af709eb39472a5c Mon Sep 17 00:00:00 2001
From: Chunnien Chan <121328115+chunnienc@users.noreply.github.com>
Date: Wed, 17 May 2023 17:22:18 -0700
Subject: [PATCH 25/32] [wasm] Fix cos and tan for large float numbers (#7689)

* Fix sin/cos workaround

* Add tests for large numbers

* Fix tan

* Exclude new tests in webgl and webgpu

* Fix

* Exclude tests in tfjs-node

* Update

* Fix

* Fix

* Fix

* Remove comments
---
 tfjs-backend-wasm/src/cc/kernels/Cos.cc       |  2 +-
 tfjs-backend-wasm/src/cc/kernels/Sin.cc       |  2 +-
 tfjs-backend-wasm/src/cc/kernels/Tan.cc       |  2 +-
 .../src/cc/sin_cos_workaround.cc              | 65 ++++++++++++++-----
 tfjs-backend-wasm/src/cc/sin_cos_workaround.h |  7 +-
 tfjs-backend-webgl/src/setup_test.ts          |  9 ++-
 tfjs-backend-webgpu/src/setup_test.ts         |  2 +
 tfjs-core/src/ops/tan_test.ts                 | 17 +++++
 tfjs-node/src/run_tests.ts                    |  1 +
 9 files changed, 81 insertions(+), 26 deletions(-)

diff --git a/tfjs-backend-wasm/src/cc/kernels/Cos.cc b/tfjs-backend-wasm/src/cc/kernels/Cos.cc
index 09418262166..28749083f6e 100644
--- a/tfjs-backend-wasm/src/cc/kernels/Cos.cc
+++ b/tfjs-backend-wasm/src/cc/kernels/Cos.cc
@@ -31,7 +31,7 @@ extern "C" {
 EMSCRIPTEN_KEEPALIVE
 #endif
 void Cos(const int x_id, const DType dtype, const int out_id) {
-  unary_f32(x_id, out_id, tfjs::sin_cos_workaround::cos_fixed);
+  unary_f32(x_id, out_id, tfjs::sin_cos_workaround::CosFixed);
 }
 
 }  // extern "C"
diff --git a/tfjs-backend-wasm/src/cc/kernels/Sin.cc b/tfjs-backend-wasm/src/cc/kernels/Sin.cc
index ccf6d2e825c..223a2de9d77 100644
--- a/tfjs-backend-wasm/src/cc/kernels/Sin.cc
+++ b/tfjs-backend-wasm/src/cc/kernels/Sin.cc
@@ -31,7 +31,7 @@ extern "C" {
 EMSCRIPTEN_KEEPALIVE
 #endif
 void Sin(const int x_id, const DType dtype, const int out_id) {
-  unary_f32(x_id, out_id, tfjs::sin_cos_workaround::sin_fixed);
+  unary_f32(x_id, out_id, tfjs::sin_cos_workaround::SinFixed);
 }
 
 }  // extern "C"
diff --git a/tfjs-backend-wasm/src/cc/kernels/Tan.cc b/tfjs-backend-wasm/src/cc/kernels/Tan.cc
index 7d3ca1f6ada..ae8fc0410c4 100644
--- a/tfjs-backend-wasm/src/cc/kernels/Tan.cc
+++ b/tfjs-backend-wasm/src/cc/kernels/Tan.cc
@@ -31,7 +31,7 @@ extern "C" {
 EMSCRIPTEN_KEEPALIVE
 #endif
 void Tan(const int x_id, const DType dtype, const int out_id) {
-  unary_f32(x_id, out_id, tfjs::sin_cos_workaround::tan_fixed);
+  unary_f32(x_id, out_id, tfjs::sin_cos_workaround::TanFixed);
 }
 
 }  // extern "C"
diff --git a/tfjs-backend-wasm/src/cc/sin_cos_workaround.cc b/tfjs-backend-wasm/src/cc/sin_cos_workaround.cc
index 5814b1b0b10..d654bef7f93 100644
--- a/tfjs-backend-wasm/src/cc/sin_cos_workaround.cc
+++ b/tfjs-backend-wasm/src/cc/sin_cos_workaround.cc
@@ -11,36 +11,65 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  * ===========================================================================*/
-#include <math.h>
+
+#include <cmath>
 
 #include "tfjs-backend-wasm/src/cc/sin_cos_workaround.h"
 
 namespace tfjs {
 namespace sin_cos_workaround {
 
-float sin_fixed(float x) {
-  if (isnan(x)) return nan("");
-  auto zero_to_2pi = fmod(fmod(x, 2 * M_PI) + 2 * M_PI, 2 * M_PI);
-
-  if (zero_to_2pi < M_PI_4) {
-    return sin(zero_to_2pi);
-  } else if (zero_to_2pi < M_PI_2) {
-    auto past_pi_4 = zero_to_2pi - M_PI_4;
-    return cos(M_PI_4 - past_pi_4);
-  } else if (zero_to_2pi < M_PI) {
-    auto past_pi_2 = zero_to_2pi - M_PI_2;
-    return sin_fixed(M_PI_2 - past_pi_2);
+namespace {
+
+template <typename T>
+inline T ShiftRadianToZeroTo2PI(const T& x) {
+  if (std::isnan(x)) {
+    return x;
+  }
+  return std::fmod(std::fmod(x, 2 * M_PI) + 2 * M_PI, 2 * M_PI);
+}
+
+template <typename T>
+inline T SinZeroTo2PI(const T& x) {
+  if (std::isnan(x)) {
+    return x;
+  }
+
+  if (x < M_PI_4) {
+    return std::sin(x);
+  } else if (x < M_PI_2) {
+    return std::cos(M_PI_2 - x);
+  } else if (x < M_PI) {
+    return SinZeroTo2PI<T>(M_PI - x);
   } else {
-    return -sin_fixed(2 * M_PI - zero_to_2pi);
+    return -SinZeroTo2PI<T>(2 * M_PI - x);
   }
 }
 
-float cos_fixed(float x) { return sin_fixed(x + M_PI_2); }
+template <typename T>
+inline T CosZeroTo2PI(const T& x) {
+  if (std::isnan(x)) {
+    return x;
+  }
 
-float tan_fixed(float x) {
-  if (isnan(x)) return nan("");
-  return sin_fixed(x) / cos_fixed(x);
+  if (x < M_PI_4) {
+    return std::cos(x);
+  } else if (x < M_PI_2) {
+    return std::sin(M_PI_2 - x);
+  } else if (x < M_PI) {
+    return -CosZeroTo2PI<T>(M_PI - x);
+  } else {
+    return CosZeroTo2PI<T>(2 * M_PI - x);
+  }
 }
 
+}  // namespace
+
+float SinFixed(float x) { return SinZeroTo2PI(ShiftRadianToZeroTo2PI(x)); }
+
+float CosFixed(float x) { return CosZeroTo2PI(ShiftRadianToZeroTo2PI(x)); }
+
+float TanFixed(float x) { return std::tan(x); }
+
 }  // namespace sin_cos_workaround
 }  // namespace tfjs
diff --git a/tfjs-backend-wasm/src/cc/sin_cos_workaround.h b/tfjs-backend-wasm/src/cc/sin_cos_workaround.h
index 201bf640ab2..b8bc0cd0ceb 100644
--- a/tfjs-backend-wasm/src/cc/sin_cos_workaround.h
+++ b/tfjs-backend-wasm/src/cc/sin_cos_workaround.h
@@ -20,12 +20,13 @@
 namespace tfjs {
 namespace sin_cos_workaround {
 
-float sin_fixed(float x);
+float SinFixed(float x);
 
-float cos_fixed(float x);
+float CosFixed(float x);
 
-float tan_fixed(float x);
+float TanFixed(float x);
 
 }  // namespace sin_cos_workaround
 }  // namespace tfjs
+
 #endif  // SIN_COS_WORKAROUND_H_
diff --git a/tfjs-backend-webgl/src/setup_test.ts b/tfjs-backend-webgl/src/setup_test.ts
index 0baca737d62..20fd1cfe11c 100644
--- a/tfjs-backend-webgl/src/setup_test.ts
+++ b/tfjs-backend-webgl/src/setup_test.ts
@@ -31,12 +31,17 @@ import {registerTestEnvs} from './backend_webgl_test_registry';
 registerTestEnvs();
 
 const TEST_FILTERS: TestFilter[] = [];
+
 const customInclude = (testName: string) => {
   const toExclude = [
-    'isBrowser: false', 'dilation gradient',
+    'isBrowser: false',
+    'dilation gradient',
     'throws when index is out of bound',
     // otsu tests for threshold op is failing on windows
-    'method otsu', 'Draw on 2d context'
+    'method otsu',
+    'Draw on 2d context',
+    // https://github.com/tensorflow/tfjs/issues/7618
+    'numbers exceed float32 precision',
   ];
   for (const subStr of toExclude) {
     if (testName.includes(subStr)) {
diff --git a/tfjs-backend-webgpu/src/setup_test.ts b/tfjs-backend-webgpu/src/setup_test.ts
index 9095d6f30ce..9bfba308483 100644
--- a/tfjs-backend-webgpu/src/setup_test.ts
+++ b/tfjs-backend-webgpu/src/setup_test.ts
@@ -69,6 +69,8 @@ const TEST_FILTERS: TestFilter[] = [
     excludes: [
       'gradients',  // Failing on MacOS
       //'gradient with clones', // Failing on MacOS
+      // https://github.com/tensorflow/tfjs/issues/7618
+      'numbers exceed float32 precision',
     ],
   },
   {
diff --git a/tfjs-core/src/ops/tan_test.ts b/tfjs-core/src/ops/tan_test.ts
index f945d2cd294..33126d17023 100644
--- a/tfjs-core/src/ops/tan_test.ts
+++ b/tfjs-core/src/ops/tan_test.ts
@@ -32,6 +32,23 @@ describeWithFlags('tan', ALL_ENVS, () => {
     expectArraysClose(await result.data(), expected);
   });
 
+  it('numbers exceed float32 precision', async () => {
+    const values = [
+      -608065414.8781943,
+      781902002.7943993,
+      -470910673.97399473,
+      1786759246.171617,
+      1873777868.5510726,
+      -1015107953.8969269,
+      830023227.6215034,
+    ];
+    const a = tf.tensor1d(values, 'float32');
+    const result = tf.tan(a);
+
+    const expected = [...new Float32Array(values).map((v) => Math.tan(v))];
+    expectArraysClose(await result.data(), expected);
+  });
+
   it('propagates NaNs', async () => {
     const a = tf.tensor1d([4, NaN, 0]);
     const res = tf.tan(a);
diff --git a/tfjs-node/src/run_tests.ts b/tfjs-node/src/run_tests.ts
index c34ff0bf342..f0e83940d2e 100644
--- a/tfjs-node/src/run_tests.ts
+++ b/tfjs-node/src/run_tests.ts
@@ -174,6 +174,7 @@ const IGNORE_LIST: string[] = [
   'upperBound',
   'lowerBound',
   'multinomial test-tensorflow {} creates the same data given the same seed',
+  'tan test-tensorflow {} numbers exceed float32 precision',
 ];
 
 if (process.platform === 'win32') {

From c66f302ea5e8b16da55283688e47e303e4590d60 Mon Sep 17 00:00:00 2001
From: Chunnien Chan <121328115+chunnienc@users.noreply.github.com>
Date: Sat, 20 May 2023 14:30:27 -0700
Subject: [PATCH 26/32] [wasm] Update xnnpack (#7507)

* wip

* Add xnn_caches

* Upgrade xnnpack

* exp

* Update xnnpack deps

* Fix xnn cache

* TEST

* Cleanup

* Cleanup

* Cleanup

* Update xnnpack

* Add flag to avoid unused function

* Add comment

* Add config to turn xnnpack logs off

* Add sha256 for emsdk

* Update xnnpack and toolchain, and disable xnn caches

* Fix lint

* Remove unused include
---
 .bazelrc                                      |  3 +
 WORKSPACE                                     | 66 ++++++-------------
 tfjs-backend-wasm/package.json                |  4 +-
 tfjs-backend-wasm/src/cc/backend.h            |  1 +
 .../src/cc/batch_mat_mul_impl.cc              |  3 +-
 tfjs-backend-wasm/src/cc/conv2d_impl.cc       |  2 +-
 tfjs-backend-wasm/src/cc/prelu_impl.cc        |  5 +-
 7 files changed, 32 insertions(+), 52 deletions(-)

diff --git a/.bazelrc b/.bazelrc
index 8da25af637e..365c70739a7 100644
--- a/.bazelrc
+++ b/.bazelrc
@@ -12,6 +12,9 @@ build --cxxopt="-std=c++17"
 build --cxxopt="-fno-rtti"
 build --cxxopt="-fno-exceptions"
 build --cxxopt="-fomit-frame-pointer"
+# Avoid unused function error `xnn_vlog` when building XNNPACK with `-c opt`.
+build:xnnpack-opt --copt="-DXNN_LOG_TO_STDIO=0"
+build:xnnpack-opt --define=xnn_log_level=none
 
 # The following --define=EXECUTOR=remote will be able to be removed
 # once https://github.com/bazelbuild/bazel/issues/7254 is fixed
diff --git a/WORKSPACE b/WORKSPACE
index 02d1d98a879..17e292cd0a6 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -102,9 +102,9 @@ http_archive(
     name = "emsdk",
     # TODO: Remove repo_mapping when emsdk updates to rules_nodejs 5
     repo_mapping = {"@nodejs": "@nodejs_host"},
-    sha256 = "b8270749b99d8d14922d1831b93781a5560fba6f7bce65cd477fc1b6aa262535",
-    strip_prefix = "emsdk-3.1.28/bazel",
-    urls = ["https://github.com/emscripten-core/emsdk/archive/refs/tags/3.1.28.tar.gz"],
+    sha256 = "bbea764c57af830e761f1fb8600d42dc303aa63ffd43647694eda5b8b757b469",
+    strip_prefix = "emsdk-3.1.35/bazel",
+    urls = ["https://github.com/emscripten-core/emsdk/archive/refs/tags/3.1.35.tar.gz"],
 )
 
 load("@emsdk//:deps.bzl", emsdk_deps = "deps")
@@ -124,9 +124,9 @@ load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository")
 # xnnpack used for fast vectorized wasm operations
 git_repository(
     name = "xnnpack",
-    commit = "5e8033a72a8d0f1c2b1f06e29137cc697c6b661d",
+    commit = "fa94f297e58c1e139ca64f78786df3744f557819",
     remote = "https://github.com/google/XNNPACK.git",
-    shallow_since = "1643627844 -0800",
+    shallow_since = "1683912990 -0700",
 )
 
 # The libraries below are transitive dependencies of XNNPACK that we need to
@@ -136,10 +136,10 @@ git_repository(
 http_archive(
     name = "FP16",
     build_file = "@xnnpack//third_party:FP16.BUILD",
-    sha256 = "0d56bb92f649ec294dbccb13e04865e3c82933b6f6735d1d7145de45da700156",
-    strip_prefix = "FP16-3c54eacb74f6f5e39077300c5564156c424d77ba",
+    sha256 = "e66e65515fa09927b348d3d584c68be4215cfe664100d01c9dbc7655a5716d70",
+    strip_prefix = "FP16-0a92994d729ff76a58f692d3028ca1b64b145d91",
     urls = [
-        "https://github.com/Maratyszcza/FP16/archive/3c54eacb74f6f5e39077300c5564156c424d77ba.zip",
+        "https://github.com/Maratyszcza/FP16/archive/0a92994d729ff76a58f692d3028ca1b64b145d91.zip",
     ],
 )
 
@@ -156,61 +156,35 @@ http_archive(
 # pthreadpool library, used for parallelization
 http_archive(
     name = "pthreadpool",
-    sha256 = "8461f6540ae9f777ce20d1c0d1d249e5e61c438744fb390c0c6f91940aa69ea3",
-    strip_prefix = "pthreadpool-545ebe9f225aec6dca49109516fac02e973a3de2",
+    sha256 = "e6370550a1abf1503daf3c2c196e0a1c2b253440c39e1a57740ff49af2d8bedf",
+    strip_prefix = "pthreadpool-43edadc654d6283b4b6e45ba09a853181ae8e850",
     urls = [
-        "https://github.com/Maratyszcza/pthreadpool/archive/545ebe9f225aec6dca49109516fac02e973a3de2.zip",
-    ],
-)
-
-# clog library, used for logging
-http_archive(
-    name = "clog",
-    build_file = "@xnnpack//third_party:clog.BUILD",
-    sha256 = "3f2dc1970f397a0e59db72f9fca6ff144b216895c1d606f6c94a507c1e53a025",
-    strip_prefix = "cpuinfo-d5e37adf1406cf899d7d9ec1d317c47506ccb970",
-    urls = [
-        "https://github.com/pytorch/cpuinfo/archive/d5e37adf1406cf899d7d9ec1d317c47506ccb970.tar.gz",
+        "https://github.com/Maratyszcza/pthreadpool/archive/43edadc654d6283b4b6e45ba09a853181ae8e850.zip",
     ],
 )
 
 # cpuinfo library, used for detecting processor characteristics
 http_archive(
     name = "cpuinfo",
-    build_file = "@xnnpack//third_party:cpuinfo.BUILD",
-    patches = ["@xnnpack//third_party:cpuinfo.patch"],
-    sha256 = "a7f9a188148a1660149878f737f42783e72f33a4f842f3e362fee2c981613e53",
-    strip_prefix = "cpuinfo-ed8b86a253800bafdb7b25c5c399f91bff9cb1f3",
+    sha256 = "ba668f9f8ea5b4890309b7db1ed2e152aaaf98af6f9a8a63dbe1b75c04e52cb9",
+    strip_prefix = "cpuinfo-3dc310302210c1891ffcfb12ae67b11a3ad3a150",
     urls = [
-        "https://github.com/pytorch/cpuinfo/archive/ed8b86a253800bafdb7b25c5c399f91bff9cb1f3.zip",
+        "https://github.com/pytorch/cpuinfo/archive/3dc310302210c1891ffcfb12ae67b11a3ad3a150.zip",
     ],
 )
 
-# psimd library, used for fallback 128-bit SIMD micro-kernels
+# Google Test framework, used by most unit-tests.
 http_archive(
-    name = "psimd",
-    build_file = "@xnnpack//third_party:psimd.BUILD",
-    sha256 = "dc615342bcbe51ca885323e51b68b90ed9bb9fa7df0f4419dbfa0297d5e837b7",
-    strip_prefix = "psimd-072586a71b55b7f8c584153d223e95687148a900",
-    urls = [
-        "https://github.com/Maratyszcza/psimd/archive/072586a71b55b7f8c584153d223e95687148a900.zip",
-    ],
-)
-
-git_repository(
     name = "com_google_googletest",
-    commit = "cd17fa2abda2a2e4111cdabd62a87aea16835014",
-    remote = "https://github.com/google/googletest.git",
-    shallow_since = "1570558426 -0400",
+    sha256 = "5cb522f1427558c6df572d6d0e1bf0fd076428633d080e88ad5312be0b6a8859",
+    strip_prefix = "googletest-e23cdb78e9fef1f69a9ef917f447add5638daf2a",
+    urls = ["https://github.com/google/googletest/archive/e23cdb78e9fef1f69a9ef917f447add5638daf2a.zip"],
 )
 
 http_archive(
     name = "rules_cc",
-    sha256 = "90d5a66950b492cbf86201cdc49c4b59796a85a4eb9fd63c07afe5f7132ea623",
-    strip_prefix = "rules_cc-8346df34b6593b051403b8e429db15c7f4ead937",
-    urls = [
-        "https://github.com/bazelbuild/rules_cc/archive/8346df34b6593b051403b8e429db15c7f4ead937.zip",
-    ],
+    strip_prefix = "rules_cc-main",
+    urls = ["https://github.com/bazelbuild/rules_cc/archive/main.zip"],
 )
 
 http_archive(
diff --git a/tfjs-backend-wasm/package.json b/tfjs-backend-wasm/package.json
index 20e53047536..b6bf9934158 100644
--- a/tfjs-backend-wasm/package.json
+++ b/tfjs-backend-wasm/package.json
@@ -14,9 +14,9 @@
   "jsdelivr": "dist/tf-backend-wasm.min.js",
   "miniprogram": "dist/miniprogram",
   "scripts": {
-    "build": "bazel build -c opt :tfjs-backend-wasm_pkg",
+    "build": "bazel build -c opt --config=xnnpack-opt :tfjs-backend-wasm_pkg",
     "build-dbg": "bazel build -c dbg :tfjs-backend-wasm_pkg",
-    "publish-npm": "bazel run -c opt :tfjs-backend-wasm_pkg.publish",
+    "publish-npm": "bazel run -c opt --config=xnnpack-opt :tfjs-backend-wasm_pkg.publish",
     "clean": "bazel clean --expunge",
     "cpplint": "./scripts/cpplint.js",
     "test": "bazel test -c dbg :tests --test_output=all",
diff --git a/tfjs-backend-wasm/src/cc/backend.h b/tfjs-backend-wasm/src/cc/backend.h
index 89bfc8161da..70a34c23683 100644
--- a/tfjs-backend-wasm/src/cc/backend.h
+++ b/tfjs-backend-wasm/src/cc/backend.h
@@ -92,6 +92,7 @@ const size_t num_tensors();
 extern size_t xnn_operator_count;
 
 extern pthreadpool *threadpool;
+
 }  // namespace backend
 
 namespace wasm {
diff --git a/tfjs-backend-wasm/src/cc/batch_mat_mul_impl.cc b/tfjs-backend-wasm/src/cc/batch_mat_mul_impl.cc
index 13f2aa92a02..25317990a10 100644
--- a/tfjs-backend-wasm/src/cc/batch_mat_mul_impl.cc
+++ b/tfjs-backend-wasm/src/cc/batch_mat_mul_impl.cc
@@ -128,7 +128,8 @@ void xnn_matmul(const size_t a_id, const size_t* a_shape_ptr,
     const uint32_t flags = XNN_FLAG_TRANSPOSE_WEIGHTS;
     xnn_status status = xnn_create_fully_connected_nc_f32(
         input_channels, output_channels, input_stride, output_stride, b_buf,
-        bias_buf, output_min, output_max, flags, &fully_connected_op);
+        bias_buf, output_min, output_max, flags,
+        /*code_cache=*/nullptr, /*weights_cache=*/nullptr, &fully_connected_op);
     if (status != xnn_status_success) {
       tfjs::util::warn(
           "XNN status for xnn_create_fully_connected_nc_f32 is not successful. "
diff --git a/tfjs-backend-wasm/src/cc/conv2d_impl.cc b/tfjs-backend-wasm/src/cc/conv2d_impl.cc
index aae4091a4b4..23d1e7871e0 100644
--- a/tfjs-backend-wasm/src/cc/conv2d_impl.cc
+++ b/tfjs-backend-wasm/src/cc/conv2d_impl.cc
@@ -236,7 +236,7 @@ void conv2d(const size_t x_id, const size_t batch_size,
         stride_height, stride_width, dilation_height, dilation_width, groups,
         group_input_channels, group_output_channels, input_pixel_stride,
         output_pixel_stride, filter_xnn, bias_buf, output_min, output_max,
-        flags, &conv2d_op);
+        flags, /*code_cache=*/nullptr, /*weights_cache=*/nullptr, &conv2d_op);
     if (status != xnn_status_success) {
       util::warn(
           "XNN status for xnn_create_convolution2d_nhwc_f32 is not successful. "
diff --git a/tfjs-backend-wasm/src/cc/prelu_impl.cc b/tfjs-backend-wasm/src/cc/prelu_impl.cc
index e795eef405d..e28a221227f 100644
--- a/tfjs-backend-wasm/src/cc/prelu_impl.cc
+++ b/tfjs-backend-wasm/src/cc/prelu_impl.cc
@@ -60,8 +60,9 @@ void prelu(const float* x_buf, const size_t x_size, const size_t weights_id,
     const size_t strides = channels;
 
     const uint32_t flags = 0;
-    xnn_status status = xnn_create_prelu_nc_f32(channels, strides, strides,
-                                                weights_buf, flags, &prelu_op);
+    xnn_status status = xnn_create_prelu_nc_f32(
+        channels, strides, strides, weights_buf, flags,
+        /*code_cache=*/nullptr, /*weights_cache=*/nullptr, &prelu_op);
     if (status != xnn_status_success) {
       util::warn(
           "XNN status for xnn_create_prelu_nc_f32 is not successful. Got "

From f7fa5af8f7a6245ff9e0ab32714bb9e33448403f Mon Sep 17 00:00:00 2001
From: Jiajia Qin <jiajia.qin@intel.com>
Date: Tue, 23 May 2023 08:14:29 +0800
Subject: [PATCH 27/32] Recover the default backend (#7709)

---
 tfjs-core/src/ops/tensor.ts | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tfjs-core/src/ops/tensor.ts b/tfjs-core/src/ops/tensor.ts
index a495e7523b5..9b664e07a07 100644
--- a/tfjs-core/src/ops/tensor.ts
+++ b/tfjs-core/src/ops/tensor.ts
@@ -55,6 +55,7 @@ import {makeTensor} from './tensor_ops_util';
  *   const customBackend = new tf.MathBackendWebGL(customCanvas);
  *   tf.registerBackend('custom-webgl', () => customBackend);
  * }
+ * const savedBackend = tf.getBackend();
  * await tf.setBackend('custom-webgl');
  * const gl = tf.backend().gpgpu.gl;
  * const texture = gl.createTexture();
@@ -94,6 +95,7 @@ import {makeTensor} from './tensor_ops_util';
  * // so:
  *
  * const tex = a.dataToGPU();
+ * await tf.setBackend(savedBackend);
  * ```
  *
  * ```js
@@ -146,6 +148,7 @@ import {makeTensor} from './tensor_ops_util';
  *   return gpuReadBuffer;
  * }
  *
+ * const savedBackend = tf.getBackend();
  * await tf.setBackend('webgpu').catch(
  *     () => {throw new Error(
  *         'Failed to use WebGPU backend. Please use Chrome Canary to run.')});
@@ -166,6 +169,7 @@ import {makeTensor} from './tensor_ops_util';
  * b.dispose();
  * result.dispose();
  * aBuffer.destroy();
+ * await tf.setBackend(savedBackend);
  * ```
  * @param values The values of the tensor. Can be nested array of numbers,
  *     or a flat array, or a `TypedArray`, or a `WebGLData` object, or a

From b122429cd8a603542956a402a93e75d0fb076933 Mon Sep 17 00:00:00 2001
From: Matthew Soulanille <msoulanille@google.com>
Date: Wed, 24 May 2023 11:59:22 -0700
Subject: [PATCH 28/32] Do not throw an error when killing the verdaccio
 process (#7695)

Killing the verdaccio process throws an error because the disconnect event emits when the process is killed. We throw an error on a disconnect to catch any unexpected verdaccio disconnections.

Fix this by deregistering the disconnect handler before killing the verdaccio process.
---
 scripts/publish-npm.ts  |  4 ++--
 scripts/release-util.ts | 24 ++++++++++++++++--------
 2 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/scripts/publish-npm.ts b/scripts/publish-npm.ts
index 7cc6e7f7a94..870a43e8643 100755
--- a/scripts/publish-npm.ts
+++ b/scripts/publish-npm.ts
@@ -359,7 +359,7 @@ async function main() {
   // Build and publish all packages to a local Verdaccio repo for staging.
   console.log(
     chalk.magenta.bold('~~~ Staging packages locally in Verdaccio ~~~'));
-  const verdaccio = await runVerdaccio();
+  const killVerdaccio = await runVerdaccio();
   try {
     for (const pkg of packages) {
       await publish(pkg, VERDACCIO_REGISTRY);
@@ -367,7 +367,7 @@ async function main() {
   } finally {
     // Make sure to kill the verdaccio server before exiting even if publish
     // throws an error. Otherwise, it blocks the port for the next run.
-    verdaccio.kill();
+    killVerdaccio();
   }
 
   if (args.dry) {
diff --git a/scripts/release-util.ts b/scripts/release-util.ts
index 57914363e1e..779f1d8051d 100755
--- a/scripts/release-util.ts
+++ b/scripts/release-util.ts
@@ -25,7 +25,7 @@ import * as readline from 'readline';
 import * as shell from 'shelljs';
 import rimraf from 'rimraf';
 import * as path from 'path';
-import {ChildProcess, fork} from 'child_process';
+import {fork} from 'child_process';
 
 export interface Phase {
   // The list of packages that will be updated with this change.
@@ -615,7 +615,7 @@ export function memoize<I, O>(f: (arg: I) => Promise<O>): (arg: I) => Promise<O>
   }
 }
 
-export async function runVerdaccio(): Promise<ChildProcess> {
+export async function runVerdaccio(): Promise<() => void> {
   // Remove the verdaccio package store.
   // TODO(mattsoulanille): Move the verdaccio storage and config file here
   // once the nightly verdaccio tests are handled by this script.
@@ -625,7 +625,8 @@ export async function runVerdaccio(): Promise<ChildProcess> {
   // messaging works and verdaccio can tell node that it has started.
   // https://verdaccio.org/docs/verdaccio-programmatically/#using-fork-from-child_process-module
   const verdaccioBin = require.resolve('verdaccio/bin/verdaccio');
-  const serverProcess = fork(verdaccioBin, ['--config=e2e/scripts/verdaccio.yaml']);
+  const config = path.join(__dirname, '../e2e/scripts/verdaccio.yaml');
+  const serverProcess = fork(verdaccioBin, [`--config=${config}`]);
   const ready = new Promise<void>((resolve, reject) => {
     const timeLimitMilliseconds = 30_000;
     console.log(`Waiting ${timeLimitMilliseconds / 1000} seconds for ` +
@@ -647,15 +648,22 @@ export async function runVerdaccio(): Promise<ChildProcess> {
   serverProcess.on('error', (err: unknown) => {
     throw new Error(`Verdaccio error: ${err}`);
   });
-  serverProcess.on('disconnect', (err: unknown) => {
-    throw new Error(`Verdaccio disconnected: ${err}`);
-  });
+
+  const onUnexpectedDisconnect = (err: unknown) => {
+    throw new Error(`Verdaccio process unexpectedly disconnected: ${err}`);
+  };
+  serverProcess.on('disconnect', onUnexpectedDisconnect);
+
+  const killVerdaccio = () => {
+    serverProcess.off('disconnect', onUnexpectedDisconnect);
+    serverProcess.kill();
+  };
 
   // Kill verdaccio when node exits.
-  process.on('exit', () => {serverProcess.kill();});
+  process.on('exit', killVerdaccio);
 
   await ready;
-  return serverProcess;
+  return killVerdaccio;
 }
 
 /**

From 94b9dc45a9876838dbf8dc458c5ca762cd451313 Mon Sep 17 00:00:00 2001
From: Jiajia Qin <jiajia.qin@intel.com>
Date: Thu, 25 May 2023 12:37:54 +0800
Subject: [PATCH 29/32] webgpu: Optimize SpaceToBatchND (#7703)

* webgpu: Optimize SpaceToBatchND

Fuse pad and transpose to one shader.
See 20% improvement for SpaceToBatchND in DeepLabV3
---
 .../src/kernels/SpaceToBatchND.ts             | 47 +++++------
 tfjs-backend-webgpu/src/pad_webgpu.ts         | 63 ++++++++-------
 .../src/space_to_batchND_webgpu.ts            | 79 +++++++++++++++++++
 tfjs-backend-webgpu/src/transpose_webgpu.ts   |  6 +-
 tfjs-backend-webgpu/src/webgpu_program.ts     |  2 +-
 5 files changed, 135 insertions(+), 62 deletions(-)
 create mode 100644 tfjs-backend-webgpu/src/space_to_batchND_webgpu.ts

diff --git a/tfjs-backend-webgpu/src/kernels/SpaceToBatchND.ts b/tfjs-backend-webgpu/src/kernels/SpaceToBatchND.ts
index f0b3663e456..c5d695afcbf 100644
--- a/tfjs-backend-webgpu/src/kernels/SpaceToBatchND.ts
+++ b/tfjs-backend-webgpu/src/kernels/SpaceToBatchND.ts
@@ -18,10 +18,9 @@
 import {backend_util, KernelConfig, KernelFunc, SpaceToBatchND, SpaceToBatchNDAttrs, SpaceToBatchNDInputs, TensorInfo, util} from '@tensorflow/tfjs-core';
 
 import {WebGPUBackend} from '../backend_webgpu';
+import {SpaceToBatchNDProgram} from '../space_to_batchND_webgpu';
 
-import {padV2} from './PadV2';
 import {reshape} from './Reshape';
-import {transpose} from './Transpose';
 
 export const spaceToBatchND = (args: {
   inputs: SpaceToBatchNDInputs,
@@ -45,41 +44,31 @@ export const spaceToBatchND = (args: {
     completePaddings.push([0, 0]);
   }
 
-  const toDispose = [];
-
-  const paddedX = padV2({
-    inputs: {x},
-    backend,
-    attrs: {paddings: completePaddings, constantValue: 0}
-  });
-
+  const paddedXShape = completePaddings.map(
+      (p, i) => p[0] /* beforePad */ + x.shape[i] + p[1] /* afterPad */);
   const reshapedPaddedShape =
-      backend_util.getReshaped(paddedX.shape, blockShape, prod, false);
+      backend_util.getReshaped(paddedXShape, blockShape, prod, false);
 
   const permutedReshapedPaddedPermutation = backend_util.getPermuted(
       reshapedPaddedShape.length, blockShape.length, false);
 
   const flattenShape =
-      backend_util.getReshapedPermuted(paddedX.shape, blockShape, prod, false);
-
-  const reshapedPaddedX = reshape(
-      {inputs: {x: paddedX}, backend, attrs: {shape: reshapedPaddedShape}});
-
-  const paddedXT = transpose({
-    inputs: {x: reshapedPaddedX},
-    backend,
-    attrs: {perm: permutedReshapedPaddedPermutation}
-  });
-
+      backend_util.getReshapedPermuted(paddedXShape, blockShape, prod, false);
+
+  const paddedXShapeStrides = util.computeStrides(paddedXShape);
+  const program = new SpaceToBatchNDProgram(
+      x.shape, paddedXShape, completePaddings, reshapedPaddedShape,
+      permutedReshapedPaddedPermutation, paddedXShapeStrides.length);
+  const uniformData = [
+    {type: 'int32', data: reshapedPaddedShape},
+    {type: 'int32', data: paddedXShapeStrides}
+  ];
+  completePaddings.map(
+      p => uniformData.push({type: 'int32', data: [p[0], p[1]]}));
+  const paddedXT = backend.runWebGPUProgram(program, [x], x.dtype, uniformData);
   const result =
       reshape({inputs: {x: paddedXT}, backend, attrs: {shape: flattenShape}});
-
-  toDispose.push(paddedX);
-  toDispose.push(reshapedPaddedX);
-  toDispose.push(paddedXT);
-
-  toDispose.forEach(t => backend.disposeData(t.dataId));
-
+  backend.disposeData(paddedXT.dataId);
   return result;
 };
 
diff --git a/tfjs-backend-webgpu/src/pad_webgpu.ts b/tfjs-backend-webgpu/src/pad_webgpu.ts
index f1b62dac20f..f999a453861 100644
--- a/tfjs-backend-webgpu/src/pad_webgpu.ts
+++ b/tfjs-backend-webgpu/src/pad_webgpu.ts
@@ -18,6 +18,38 @@
 import {getCoordsDataType, getMainHeaderString as main, WebGPUProgram} from './webgpu_program';
 import {computeDispatch, flatDispatchLayout} from './webgpu_util';
 
+export function padCommon(shape: number[], fillZero = false): string {
+  const rank = shape.length;
+  const type = getCoordsDataType(rank);
+  const start = shape.map((_, i) => `uniforms.pad${i}[0]`).join(',');
+  const end = shape
+                  .map(
+                      (_, i) => `uniforms.pad${i}[0] + uniforms.xShape${
+                          rank > 1 ? `[${i}]` : ''}`)
+                  .join(',');
+  const startValue = rank > 1 ? `${type}(${start})` : `${start}`;
+  const endValue = rank > 1 ? `${type}(${end})` : `${end}`;
+
+  const leftPadCondition =
+      rank > 1 ? `any(paddedCoords < start)` : `paddedCoords < start`;
+  const rightPadCondition =
+      rank > 1 ? `any(paddedCoords >= end)` : `paddedCoords >= end`;
+
+  const unpackedCoords = rank > 1 ?
+      ['coords[0]', 'coords[1]', 'coords[2]', 'coords[3]'].slice(0, rank) :
+      'coords';
+  return `
+        let start = ${startValue};
+        let end = ${endValue};
+        if (${leftPadCondition} || ${rightPadCondition}) {
+          setOutputAtIndex(index, ${fillZero ? 0.0 : 'uniforms.constantValue'});
+        } else {
+          let coords = paddedCoords - start;
+          setOutputAtIndex(index, getX(${unpackedCoords}));
+        }
+  `;
+}
+
 export class PadProgram implements WebGPUProgram {
   outputShape: number[];
   shaderKey: string;
@@ -43,38 +75,11 @@ export class PadProgram implements WebGPUProgram {
   }
 
   getUserCode(): string {
-    const rank = this.xShape.length;
-    const type = getCoordsDataType(rank);
-    // The length of paddings are same with the rank of the input tensor.
-    const start = this.xShape.map((_, i) => `uniforms.pad${i}[0]`).join(',');
-    const end = this.xShape
-                    .map(
-                        (_, i) => `uniforms.pad${i}[0] + uniforms.xShape${
-                            rank > 1 ? `[${i}]` : ''}`)
-                    .join(',');
-    const startValue = rank > 1 ? `${type}(${start})` : `${start}`;
-    const endValue = rank > 1 ? `${type}(${end})` : `${end}`;
-
-    const leftPadCondition = rank > 1 ? `any(outC < start)` : `outC < start`;
-    const rightPadCondition = rank > 1 ? `any(outC >= end)` : `outC >= end`;
-
-    const unpackedCoords = rank > 1 ?
-        ['coords[0]', 'coords[1]', 'coords[2]', 'coords[3]'].slice(0, rank) :
-        'coords';
-
     const userCode = `
       ${main('index')} {
         if (index < uniforms.size) {
-          let start = ${startValue};
-          let end = ${endValue};
-          let outC = getCoordsFromIndex(index);
-
-          if (${leftPadCondition} || ${rightPadCondition}) {
-            setOutputAtIndex(index, uniforms.constantValue);
-          } else {
-            let coords = outC - start;
-            setOutputAtIndex(index, getX(${unpackedCoords}));
-          }
+          let paddedCoords = getCoordsFromIndex(index);
+          ${padCommon(this.xShape)}
         }
       }
     `;
diff --git a/tfjs-backend-webgpu/src/space_to_batchND_webgpu.ts b/tfjs-backend-webgpu/src/space_to_batchND_webgpu.ts
new file mode 100644
index 00000000000..72b5f24b22b
--- /dev/null
+++ b/tfjs-backend-webgpu/src/space_to_batchND_webgpu.ts
@@ -0,0 +1,79 @@
+/**
+ * @license
+ * Copyright 2023 Google LLC.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * =============================================================================
+ */
+
+import {padCommon} from './pad_webgpu';
+import {getSwitchedCoords} from './transpose_webgpu';
+import {getCoordsDataType, getCoordsFromIndexSnippet, getMainHeaderString as main, WebGPUProgram} from './webgpu_program';
+import {computeDispatch, flatDispatchLayout} from './webgpu_util';
+
+export class SpaceToBatchNDProgram implements WebGPUProgram {
+  variableNames = ['x'];
+  outputShape: number[] = [];
+  shaderKey: string;
+  dispatchLayout: {x: number[]};
+  dispatch: [number, number, number];
+  uniforms = '';
+  workgroupSize: [number, number, number] = [64, 1, 1];
+  newDim: number[];
+  xShape: number[];
+  paddedXShape: number[];
+  size = true;
+
+  constructor(
+      xShape: number[], paddedXShape: number[],
+      paddings: Array<[number, number]>, reshapedPaddedXShape: number[],
+      newDim: number[], paddedXShapeStridesShapeLength: number) {
+    const outputShape: number[] = new Array(reshapedPaddedXShape.length);
+    for (let i = 0; i < outputShape.length; i++) {
+      outputShape[i] = reshapedPaddedXShape[newDim[i]];
+    }
+    this.outputShape = outputShape;
+    this.newDim = newDim;
+    this.dispatchLayout = flatDispatchLayout(this.outputShape);
+    this.dispatch = computeDispatch(
+        this.dispatchLayout, this.outputShape, this.workgroupSize);
+    this.xShape = xShape;
+    this.paddedXShape = paddedXShape;
+    this.uniforms += `reshapedPaddedXShape : ${
+        getCoordsDataType(
+            reshapedPaddedXShape.length)}, paddedXShapeStrides : ${
+        getCoordsDataType(paddedXShapeStridesShapeLength)}, `;
+    paddings.map((_, i) => {
+      this.uniforms += ` pad${i} : vec2<i32>,`;
+    });
+    this.shaderKey = `spaceToBatchND_${newDim}`;
+  }
+
+  getUserCode(): string {
+    const dtype = getCoordsDataType(this.outputShape.length);
+    const switched = getSwitchedCoords(this.newDim);
+
+    const userCode = `
+      ${getCoordsFromIndexSnippet(this.paddedXShape, 'PaddedX')}
+      ${main('index')} {
+        if(index < uniforms.size) {
+          let coords = getCoordsFromIndex(index);
+          let switchedIndex = getIndexFromCoords${this.outputShape.length}D(${
+        dtype}(${switched}), uniforms.reshapedPaddedXShape);
+          let paddedCoords = getPaddedXCoordsFromIndex(switchedIndex);
+          ${padCommon(this.xShape, true)}
+        }
+      }
+    `;
+    return userCode;
+  }
+}
diff --git a/tfjs-backend-webgpu/src/transpose_webgpu.ts b/tfjs-backend-webgpu/src/transpose_webgpu.ts
index 472df47fe14..8c86b4f30a7 100644
--- a/tfjs-backend-webgpu/src/transpose_webgpu.ts
+++ b/tfjs-backend-webgpu/src/transpose_webgpu.ts
@@ -53,7 +53,7 @@ export class TransposeProgram implements WebGPUProgram {
         for(var i = 0; i < ${this.workPerThread}; i = i + 1) {
           let flatIndex = index * ${this.workPerThread} + i;
           if(flatIndex < uniforms.size) {
-            let resRC = getCoordsFromIndex(flatIndex);
+            let coords = getCoordsFromIndex(flatIndex);
             setOutputAtIndex(flatIndex, A[getIndexFromCoords${
         this.outputShape.length}D(
               ${dtype}(${switched}), uniforms.aShape)]);
@@ -65,14 +65,14 @@ export class TransposeProgram implements WebGPUProgram {
   }
 }
 
-function getSwitchedCoords(newDim: number[]): string {
+export function getSwitchedCoords(newDim: number[]): string {
   const rank = newDim.length;
   if (rank > 6) {
     throw Error(`Transpose for rank ${rank} is not yet supported`);
   }
   const switchedCoords = new Array(rank);
   for (let i = 0; i < newDim.length; i++) {
-    switchedCoords[newDim[i]] = `resRC.${getCoordsXYZ(i)}`;
+    switchedCoords[newDim[i]] = `coords.${getCoordsXYZ(i)}`;
   }
 
   return switchedCoords.join();
diff --git a/tfjs-backend-webgpu/src/webgpu_program.ts b/tfjs-backend-webgpu/src/webgpu_program.ts
index 258c7b07e59..4743cd5320f 100644
--- a/tfjs-backend-webgpu/src/webgpu_program.ts
+++ b/tfjs-backend-webgpu/src/webgpu_program.ts
@@ -444,7 +444,7 @@ type InputInfo = {
  * with each stride and decrements the index until the index equals the final
  * dimension coordinate.
  */
-function getCoordsFromIndexSnippet(shape: number[], name = ''): string {
+export function getCoordsFromIndexSnippet(shape: number[], name = ''): string {
   const rank = shape.length;
   const funcName = name !== '' ?
       `get${name.charAt(0).toUpperCase() + name.slice(1)}CoordsFromIndex` :

From 80d452672d26613cae6a46e9fce8ab4b7abca095 Mon Sep 17 00:00:00 2001
From: Yang Gu <yang.gu@intel.com>
Date: Thu, 25 May 2023 14:02:54 +0800
Subject: [PATCH 30/32] webgpu: Replace timestamp-query-in-passes with
 timestamp-query (#7714)

* webgpu: Replace timestamp-query-in-passes with timestamp-query

Timestamp-query has a broader support than timestamp-query-in-passes
on all platforms, including macOS. Note that Chrome switch
'--disable-dawn-features=disallow_unsafe_apis' is still needed now as the
timestamp has the accuracy of nanosecond, which is too accurate to be
safe. Later changes in Chrome may lift this limitation.
---
 tfjs-backend-webgpu/src/backend_webgpu.ts | 205 ++++++++++++----------
 tfjs-backend-webgpu/src/base.ts           |  10 +-
 tfjs-core/yarn.lock                       |   8 +-
 yarn.lock                                 |   4 +-
 4 files changed, 116 insertions(+), 111 deletions(-)

diff --git a/tfjs-backend-webgpu/src/backend_webgpu.ts b/tfjs-backend-webgpu/src/backend_webgpu.ts
index 4307813e1af..f4f89130d37 100644
--- a/tfjs-backend-webgpu/src/backend_webgpu.ts
+++ b/tfjs-backend-webgpu/src/backend_webgpu.ts
@@ -49,7 +49,8 @@ type TensorData = {
 interface DataId {}
 
 export type WebGPUKernelInfo = {
-  name: string; query: Promise<number>;
+  name: string,
+  query: Promise<number>,
 };
 
 export type TimerNode = RecursiveArray<WebGPUKernelInfo>|WebGPUKernelInfo;
@@ -105,10 +106,10 @@ export class WebGPUBackend extends KernelBackend {
   thresholdToIncreaseWorkgroups: number;
 
   private activeTimers: TimerNode[];
-  private currentCommandEncoder: GPUCommandEncoder;
-  private currentComputePass: GPUComputePassEncoder;
+  private commandEncoder: GPUCommandEncoder;
+  private computePassEncoder: GPUComputePassEncoder;
   private commandQueueOwnedIds = new WeakSet<DataId>();
-  private dispatchNumberInEncoder = 0;
+  private dispatchCountInPass = 0;
   private disposed = false;
   private downloadWaitMs = 0;
   private dummyCanvas: HTMLCanvasElement;
@@ -118,12 +119,15 @@ export class WebGPUBackend extends KernelBackend {
   private pipelineCache:
       {[key: string]: GPUComputePipeline|Promise<GPUComputePipeline>};
   private programTimersStack: TimerNode[];
-  private querySet: GPUQuerySet;
+  private queryResolveBuffer: GPUBuffer = null;
+  private querySet: GPUQuerySet = null;
+  private querySetCount = 2;
   private stagingPendingDisposal: GPUBuffer[] = [];
-  private supportTimeQuery: boolean;
+  private supportTimestampQuery: boolean;
   private uniformPendingDisposal: GPUBuffer[] = [];
   private uploadWaitMs = 0;
   private hasReadSyncWarned = false;
+  private hasTimestampQueryWarned = false;
 
   private nextDataId(): number {
     return WebGPUBackend.nextDataId++;
@@ -137,23 +141,16 @@ export class WebGPUBackend extends KernelBackend {
     this.pipelineCache = {};
     this.device = device;
     this.queue = device.queue;
-    this.currentCommandEncoder = null;
-    this.currentComputePass = null;
-    this.supportTimeQuery =
-        device.features.has('timestamp-query-inside-passes');
+    this.commandEncoder = null;
+    this.computePassEncoder = null;
     this.adapterInfo = new AdapterInfo(adapterInfo);
+    this.supportTimestampQuery = this.device.features.has('timestamp-query');
     this.thresholdToIncreaseWorkgroups =
         this.adapterInfo.intelGPUGeneration >= 12 ? 16 : 8;
 
     this.bufferManager = new BufferManager(this.device);
     this.textureManager = new TextureManager(this.device);
     this.tensorMap = new DataStorage(this, engine());
-    if (this.supportTimeQuery) {
-      this.querySet = this.device.createQuerySet({
-        type: 'timestamp',
-        count: 2,
-      });
-    }
 
     // Profiling tools like PIX needs this dummy canvas to
     // trigger capturing a frame.
@@ -290,10 +287,9 @@ export class WebGPUBackend extends KernelBackend {
   }
 
   submitQueue() {
-    this.ensureComputePassEnded();
-    this.queue.submit([this.currentCommandEncoder.finish()]);
-    this.currentCommandEncoder = null;
-    this.dispatchNumberInEncoder = 0;
+    this.queue.submit([this.commandEncoder.finish()]);
+    this.commandEncoder = null;
+    this.dispatchCountInPass = 0;
 
     this.commandQueueOwnedIds = new WeakSet<DataId>();
 
@@ -313,23 +309,16 @@ export class WebGPUBackend extends KernelBackend {
   }
 
   ensureCommandEncoderReady() {
-    if (!this.currentCommandEncoder) {
-      this.currentCommandEncoder = this.device.createCommandEncoder();
-    }
-  }
-
-  ensureComputePassEnded() {
-    if (this.currentComputePass) {
-      this.currentComputePass.end();
-      this.currentComputePass = null;
+    if (!this.commandEncoder) {
+      this.commandEncoder = this.device.createCommandEncoder();
     }
   }
 
-  getComputePass() {
-    if (!this.currentComputePass) {
-      this.currentComputePass = this.currentCommandEncoder.beginComputePass();
+  endComputePassEncoder() {
+    if (this.computePassEncoder) {
+      this.computePassEncoder.end();
+      this.computePassEncoder = null;
     }
-    return this.currentComputePass;
   }
 
   // Check if parallel compilation is done.
@@ -356,9 +345,8 @@ export class WebGPUBackend extends KernelBackend {
     const stagingBuffer = this.bufferManager.acquireBuffer(
         size, GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ);
     this.ensureCommandEncoderReady();
-    this.ensureComputePassEnded();
-    this.currentCommandEncoder.copyBufferToBuffer(
-        buffer, 0, stagingBuffer, 0, size);
+    this.endComputePassEncoder();
+    this.commandEncoder.copyBufferToBuffer(buffer, 0, stagingBuffer, 0, size);
     this.submitQueue();
 
     await stagingBuffer.mapAsync(GPUMapMode.READ);
@@ -431,7 +419,7 @@ export class WebGPUBackend extends KernelBackend {
         alphaModes.map(_ => new OffscreenCanvas(canvasWidth, canvasHeight));
     const stagingHostStorage = new OffscreenCanvas(canvasWidth, canvasHeight);
 
-    this.ensureComputePassEnded();
+    this.endComputePassEncoder();
     stagingDeviceStorage
         .map((storage, index) => {
           const context = storage.getContext('webgpu');
@@ -450,7 +438,7 @@ export class WebGPUBackend extends KernelBackend {
           const readDataGPUToCPU =
               (width: number, height: number, offset: number) => {
                 this.ensureCommandEncoderReady();
-                this.currentCommandEncoder.copyBufferToTexture(
+                this.commandEncoder.copyBufferToTexture(
                     {
                       buffer,
                       bytesPerRow,
@@ -556,9 +544,8 @@ export class WebGPUBackend extends KernelBackend {
     const usage = srcBuffer.usage;
     const dstBuffer = this.bufferManager.acquireBuffer(size, usage);
     this.ensureCommandEncoderReady();
-    this.ensureComputePassEnded();
-    this.currentCommandEncoder.copyBufferToBuffer(
-        srcBuffer, 0, dstBuffer, 0, size);
+    this.endComputePassEncoder();
+    this.commandEncoder.copyBufferToBuffer(srcBuffer, 0, dstBuffer, 0, size);
     this.submitQueue();
     return dstBuffer;
   }
@@ -627,8 +614,8 @@ export class WebGPUBackend extends KernelBackend {
     const usage = srcBuffer.usage;
     const buffer = this.bufferManager.acquireBuffer(size, usage);
     this.ensureCommandEncoderReady();
-    this.ensureComputePassEnded();
-    this.currentCommandEncoder.copyBufferToBuffer(
+    this.endComputePassEncoder();
+    this.commandEncoder.copyBufferToBuffer(
         resource as GPUBuffer, 0, buffer, 0, size);
     this.submitQueue();
 
@@ -660,15 +647,16 @@ export class WebGPUBackend extends KernelBackend {
   }
 
   override async time(f: () => void): Promise<WebGPUTimingInfo> {
-    if (!this.supportTimeQuery) {
+    if (!this.supportTimestampQuery && !this.hasTimestampQueryWarned) {
       console.warn(
-          `This device doesn't support timestamp-query-inside-passes extension. ` +
+          `This device doesn't support timestamp-query extension. ` +
           `Start Chrome browser with flag ` +
-          `--disable-dawn-features=disallow_unsafe_apis then try again. ` +
+          `--disable-dawn-features=disallow_unsafe_apis to try it again. ` +
           `Otherwise, zero will be shown for the kernel time when profiling ` +
-          `mode is enabled. Using performance.now is not workable for webgpu ` +
-          `since it doesn't support synchronous data read from GPU.`);
+          `mode is enabled.`);
+      this.hasTimestampQueryWarned = true;
     }
+
     const oldActiveTimers = this.activeTimers;
     const newActiveTimers: TimerNode[] = [];
 
@@ -742,14 +730,6 @@ export class WebGPUBackend extends KernelBackend {
     return resource;
   }
 
-  async getQueryTime(query: GPUQuerySet): Promise<number> {
-    if (this.supportTimeQuery) {
-      return this.getTimeFromQuerySet(query);
-    } else {
-      return 0;
-    }
-  }
-
   uploadToGPU(dataId: DataId): void {
     const tensorData = this.tensorMap.get(dataId);
     // Already on the GPU.
@@ -776,8 +756,8 @@ export class WebGPUBackend extends KernelBackend {
         }
         stagingBuffer.unmap();
         this.ensureCommandEncoderReady();
-        this.ensureComputePassEnded();
-        this.currentCommandEncoder.copyBufferToBuffer(
+        this.endComputePassEncoder();
+        this.commandEncoder.copyBufferToBuffer(
             stagingBuffer, 0, buffer, 0, size);
 
         this.stagingPendingDisposal.push(stagingBuffer);
@@ -966,57 +946,85 @@ export class WebGPUBackend extends KernelBackend {
       layout: program.pipeline.getBindGroupLayout(0),
       entries: bindings.map((b, i) => ({binding: i, resource: b})),
     });
-    this.ensureCommandEncoderReady();
-    const pass = this.getComputePass();
 
     const shouldTimeProgram = this.activeTimers != null;
-    if (shouldTimeProgram && this.supportTimeQuery) {
-      // tslint:disable-next-line:no-any
-      (pass as any).writeTimestamp(this.querySet, 0);
+    this.ensureCommandEncoderReady();
+
+    if (!this.computePassEncoder) {
+      const computePassDescriptor: GPUComputePassDescriptor = {};
+      if (shouldTimeProgram && this.supportTimestampQuery) {
+        if (this.querySet == null) {
+          this.querySet = this.device.createQuerySet({
+            type: 'timestamp',
+            count: this.querySetCount,
+          });
+        }
+        computePassDescriptor.timestampWrites = [
+          {
+            querySet: this.querySet,
+            queryIndex: 0,
+            location: 'beginning',
+          },
+          {
+            querySet: this.querySet,
+            queryIndex: 1,
+            location: 'end',
+          }
+        ];
+      }
+      this.computePassEncoder =
+          this.commandEncoder.beginComputePass(computePassDescriptor);
     }
 
-    pass.setPipeline(program.pipeline);
-    pass.setBindGroup(0, bindGroup);
-    pass.dispatchWorkgroups(
+    this.computePassEncoder.setPipeline(program.pipeline);
+    this.computePassEncoder.setBindGroup(0, bindGroup);
+    this.computePassEncoder.dispatchWorkgroups(
         program.dispatch[0], program.dispatch[1], program.dispatch[2]);
-
-    if (shouldTimeProgram && this.supportTimeQuery) {
-      // tslint:disable-next-line:no-any
-      (pass as any).writeTimestamp(this.querySet, 1);
+    this.dispatchCountInPass++;
+
+    if (shouldTimeProgram ||
+        env().get('WEBGPU_DEFERRED_SUBMIT_BATCH_SIZE') as
+            number <= this.dispatchCountInPass) {
+      this.endComputePassEncoder();
+      if (shouldTimeProgram) {
+        this.activeTimers.push(
+            {name: program.constructor.name, query: this.getQueryTime()});
+      } else {
+        this.submitQueue();
+      }
     }
-    this.dispatchNumberInEncoder++;
+  }
 
-    if (env().get('WEBGPU_DEFERRED_SUBMIT_BATCH_SIZE') as
-        number <= this.dispatchNumberInEncoder) {
-      this.submitQueue();
+  async getQueryTime(): Promise<number> {
+    if (!this.supportTimestampQuery) {
+      return 0;
     }
-    if (shouldTimeProgram) {
-      this.activeTimers.push({
-        name: program.constructor.name,
-        query: this.getQueryTime(this.querySet)
-      });
+
+    if (this.queryResolveBuffer == null) {
+      this.queryResolveBuffer = this.bufferManager.acquireBuffer(
+          this.querySetCount * 8,
+          GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST |
+              GPUBufferUsage.QUERY_RESOLVE);
     }
-  }
+    this.commandEncoder.resolveQuerySet(
+        this.querySet, 0, this.querySetCount, this.queryResolveBuffer, 0);
 
-  async getTimeFromQuerySet(querySet: GPUQuerySet) {
-    const queryBuffer = this.bufferManager.acquireBuffer(
-        16, GPUBufferUsage.COPY_SRC | GPUBufferUsage.QUERY_RESOLVE);
-    const dst = this.bufferManager.acquireBuffer(
-        16, GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST);
+    const queryStagingBuffer = this.bufferManager.acquireBuffer(
+        this.querySetCount * 8,
+        GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST);
+
+    this.commandEncoder.copyBufferToBuffer(
+        this.queryResolveBuffer, 0, queryStagingBuffer, 0,
+        this.querySetCount * 8);
 
-    this.ensureCommandEncoderReady();
-    this.ensureComputePassEnded();
-    this.currentCommandEncoder.resolveQuerySet(querySet, 0, 2, queryBuffer, 0);
-    this.currentCommandEncoder.copyBufferToBuffer(queryBuffer, 0, dst, 0, 16);
     this.submitQueue();
-    await dst.mapAsync(GPUMapMode.READ);
-    const arrayBuf = new BigUint64Array(dst.getMappedRange());
-    const timeElapsedNanos = Number((arrayBuf[1] - arrayBuf[0]));
-    dst.unmap();
-    this.bufferManager.releaseBuffer(dst);
-    this.bufferManager.releaseBuffer(queryBuffer);
-    // Return milliseconds.
-    return timeElapsedNanos / 1000000;
+
+    await queryStagingBuffer.mapAsync(GPUMapMode.READ);
+    const arrayBuffer = new BigUint64Array(queryStagingBuffer.getMappedRange());
+    const time = Number(arrayBuffer[1] - arrayBuffer[0]) / 1000000;
+    queryStagingBuffer.unmap();
+    this.bufferManager.releaseBuffer(queryStagingBuffer);
+    return time;
   }
 
   shouldExecuteOnCPU(
@@ -1036,6 +1044,9 @@ export class WebGPUBackend extends KernelBackend {
     if (this.disposed) {
       return;
     }
+    if (this.querySet != null) {
+      this.querySet.destroy();
+    }
     this.bufferManager.dispose();
     this.textureManager.dispose();
     this.disposed = true;
diff --git a/tfjs-backend-webgpu/src/base.ts b/tfjs-backend-webgpu/src/base.ts
index be71afb2d2d..8f4ad52877c 100644
--- a/tfjs-backend-webgpu/src/base.ts
+++ b/tfjs-backend-webgpu/src/base.ts
@@ -33,14 +33,8 @@ if (isWebGPUSupported()) {
     const adapter = await navigator.gpu.requestAdapter(gpuDescriptor);
     const deviceDescriptor: GPUDeviceDescriptor = {};
 
-    // Note that timestamp-query-inside-passes is not formally in spec as
-    // timestamp within a pass is not generally supported on all the platforms.
-    // More details can be found at
-    // https://github.com/gpuweb/gpuweb/blob/main/proposals/timestamp-query-inside-passes.md
-    if (adapter.features.has('timestamp-query-inside-passes')) {
-      deviceDescriptor.requiredFeatures =
-          // tslint:disable-next-line:no-any
-          ['timestamp-query-inside-passes' as any];
+    if (adapter.features.has('timestamp-query')) {
+      deviceDescriptor.requiredFeatures = ['timestamp-query'];
     }
 
     const adapterLimits = adapter.limits;
diff --git a/tfjs-core/yarn.lock b/tfjs-core/yarn.lock
index 10bf8b85753..9bb5eb1c6ec 100644
--- a/tfjs-core/yarn.lock
+++ b/tfjs-core/yarn.lock
@@ -32,10 +32,10 @@
   resolved "https://registry.yarnpkg.com/@types/webgl-ext/-/webgl-ext-0.0.30.tgz#0ce498c16a41a23d15289e0b844d945b25f0fb9d"
   integrity sha512-LKVgNmBxN0BbljJrVUwkxwRYqzsAEPcZOe6S2T6ZaBDIrFp0qu4FNlpc5sM1tGbXUYFgdVQIoeLk1Y1UoblyEg==
 
-"@webgpu/types@0.1.21":
-  version "0.1.21"
-  resolved "https://registry.yarnpkg.com/@webgpu/types/-/types-0.1.21.tgz#b181202daec30d66ccd67264de23814cfd176d3a"
-  integrity sha512-pUrWq3V5PiSGFLeLxoGqReTZmiiXwY3jRkIG5sLLKjyqNxrwm/04b4nw7LSmGWJcKk59XOM/YRTUwOzo4MMlow==
+"@webgpu/types@0.1.30":
+  version "0.1.30"
+  resolved "https://registry.npmjs.org/@webgpu/types/-/types-0.1.30.tgz#b6406dc4a1c1e0d469028ceb30ddffbbd2fa706c"
+  integrity sha512-9AXJSmL3MzY8ZL//JjudA//q+2kBRGhLBFpkdGksWIuxrMy81nFrCzj2Am+mbh8WoU6rXmv7cY5E3rdlyru2Qg==
 
 long@4.0.0:
   version "4.0.0"
diff --git a/yarn.lock b/yarn.lock
index a52b1eb6180..29251ec4a1c 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -520,7 +520,7 @@
 
 "@webgpu/types@0.1.30":
   version "0.1.30"
-  resolved "https://registry.npmmirror.com/@webgpu/types/-/types-0.1.30.tgz#b6406dc4a1c1e0d469028ceb30ddffbbd2fa706c"
+  resolved "https://registry.npmjs.org/@webgpu/types/-/types-0.1.30.tgz#b6406dc4a1c1e0d469028ceb30ddffbbd2fa706c"
   integrity sha512-9AXJSmL3MzY8ZL//JjudA//q+2kBRGhLBFpkdGksWIuxrMy81nFrCzj2Am+mbh8WoU6rXmv7cY5E3rdlyru2Qg==
 
 "@xmldom/xmldom@^0.7.3":
@@ -4507,7 +4507,7 @@ type@^2.5.0:
 
 typescript@4.9.4:
   version "4.9.4"
-  resolved "https://registry.yarnpkg.com/typescript/-/typescript-4.9.4.tgz#a2a3d2756c079abda241d75f149df9d561091e78"
+  resolved "https://registry.npmjs.org/typescript/-/typescript-4.9.4.tgz#a2a3d2756c079abda241d75f149df9d561091e78"
   integrity sha512-Uz+dTXYzxXXbsFpM86Wh3dKCxrQqUcVMxwU54orwlJjOpO3ao8L7j5lH+dWfTwgCwIuM9GQ2kvVotzYJMXTBZg==
 
 ua-parser-js@^0.7.30:

From 428e55b98050cd5840def33758e8471f045fd641 Mon Sep 17 00:00:00 2001
From: Yang Gu <yang.gu@intel.com>
Date: Mon, 29 May 2023 15:38:24 +0800
Subject: [PATCH 31/32] webgpu: Fix timestamp query (#7723)

If a pass that needs timestamp query follows a pass without timestamp
query, querySet may not be created as expected. This PR fixes this
issue.
---
 tfjs-backend-webgpu/src/backend_webgpu.ts | 44 ++++++++++++-----------
 1 file changed, 23 insertions(+), 21 deletions(-)

diff --git a/tfjs-backend-webgpu/src/backend_webgpu.ts b/tfjs-backend-webgpu/src/backend_webgpu.ts
index f4f89130d37..b21c9be1030 100644
--- a/tfjs-backend-webgpu/src/backend_webgpu.ts
+++ b/tfjs-backend-webgpu/src/backend_webgpu.ts
@@ -950,28 +950,30 @@ export class WebGPUBackend extends KernelBackend {
     const shouldTimeProgram = this.activeTimers != null;
     this.ensureCommandEncoderReady();
 
-    if (!this.computePassEncoder) {
-      const computePassDescriptor: GPUComputePassDescriptor = {};
-      if (shouldTimeProgram && this.supportTimestampQuery) {
-        if (this.querySet == null) {
-          this.querySet = this.device.createQuerySet({
-            type: 'timestamp',
-            count: this.querySetCount,
-          });
-        }
-        computePassDescriptor.timestampWrites = [
-          {
-            querySet: this.querySet,
-            queryIndex: 0,
-            location: 'beginning',
-          },
-          {
-            querySet: this.querySet,
-            queryIndex: 1,
-            location: 'end',
-          }
-        ];
+    const computePassDescriptor: GPUComputePassDescriptor = {};
+    if (shouldTimeProgram && this.supportTimestampQuery) {
+      this.endComputePassEncoder();
+      if (this.querySet == null) {
+        this.querySet = this.device.createQuerySet({
+          type: 'timestamp',
+          count: this.querySetCount,
+        });
       }
+      computePassDescriptor.timestampWrites = [
+        {
+          querySet: this.querySet,
+          queryIndex: 0,
+          location: 'beginning',
+        },
+        {
+          querySet: this.querySet,
+          queryIndex: 1,
+          location: 'end',
+        }
+      ];
+      this.computePassEncoder =
+          this.commandEncoder.beginComputePass(computePassDescriptor);
+    } else if (!this.computePassEncoder) {
       this.computePassEncoder =
           this.commandEncoder.beginComputePass(computePassDescriptor);
     }

From dc65451c20a87817197d0a565c3a49793095ca11 Mon Sep 17 00:00:00 2001
From: Jiajia Qin <jiajia.qin@intel.com>
Date: Tue, 30 May 2023 15:49:45 +0800
Subject: [PATCH 32/32] webgpu: Move the readSync warning position (#7724)

The warning should only happen when there is real data reading from
gpu to cpu.
---
 tfjs-backend-webgpu/src/backend_webgpu.ts | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/tfjs-backend-webgpu/src/backend_webgpu.ts b/tfjs-backend-webgpu/src/backend_webgpu.ts
index b21c9be1030..9b5faa9f345 100644
--- a/tfjs-backend-webgpu/src/backend_webgpu.ts
+++ b/tfjs-backend-webgpu/src/backend_webgpu.ts
@@ -377,13 +377,6 @@ export class WebGPUBackend extends KernelBackend {
   }
 
   override readSync(dataId: object): BackendValues {
-    if (!this.hasReadSyncWarned) {
-      this.hasReadSyncWarned = true;
-      console.warn(
-          `The performance of synchronously reading data from GPU to CPU is ` +
-          `poor on the webgpu backend, please use asynchronous APIs instead.`);
-    }
-
     const tensorData = this.tensorMap.get(dataId);
     const {values, complexTensorInfos} = tensorData;
 
@@ -403,6 +396,13 @@ export class WebGPUBackend extends KernelBackend {
       return complexVals;
     }
 
+    if (!this.hasReadSyncWarned) {
+      this.hasReadSyncWarned = true;
+      console.warn(
+          `The performance of synchronously reading data from GPU to CPU is ` +
+          `poor on the webgpu backend, please use asynchronous APIs instead.`);
+    }
+
     const alphaModes: GPUCanvasAlphaMode[] = ['opaque', 'premultiplied'];
 
     const buffer = tensorData.resource as GPUBuffer;