add dynamic quantize gemm benchmark [step 3: int8 gemm compute] (#2271)

Summary: Pull Request resolved: #2271 This diff adds support for the `qlinear_channelwise` operation in the FX backend for Freya and Artemis. The kernel implementation is left as a placeholder. Reviewed By: archishman Differential Revision: D50437264 fbshipit-source-id: dc13582bb8538ee34d53c302938bccd23fcd7e1a
pytorch · Jan 19, 2024 · c3000d8 · c3000d8
1 parent 9a3c5b2
commit c3000d8
Showing 1 changed file with 35 additions and 0 deletions.
diff --git a/fbgemm_gpu/src/qlinear_channelwise/qlinear_channelwise_mtia.cpp b/fbgemm_gpu/src/qlinear_channelwise/qlinear_channelwise_mtia.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <ATen/ATen.h>
+#include <ATen/core/op_registration/op_registration.h>
+#include <torch/library.h>
+
+static at::Tensor qlinear_channelwise(
+    at::Tensor x,
+    at::Tensor weight,
+    at::Tensor bias,
+    at::Tensor input_scale,
+    at::Tensor weight_scale,
+    at::Tensor weight_zero_point,
+    at::Tensor relu) {
+  // quantized linear function with
+  // activation: per-tensor quantization
+  // weight: per-tensor quantization
+  return x;
+}
+
+TORCH_LIBRARY_FRAGMENT(fbgemm, m) {
+  m.def(
+      "qlinear_channelwise(Tensor x, Tensor weight, Tensor "
+      "bias, Tensor input_scale, Tensor weight_scale, Tensor "
+      "weight_zero_point, Tensor relu) -> Tensor");
+  m.impl(
+      "qlinear_channelwise",
+      torch::dispatch(c10::DispatchKey::CPU, TORCH_FN(qlinear_channelwise)));
+}