Skip to content

Commit

Permalink
migrate jarvis quant-per-tensor hifi ops to oss (#6293)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: #6293

- only migrated **quant-per-tensor** in this diff. will do the rest of 3 in the stack
- solved the --start-lib --end-lib option not recognized in here - need to import libs from //executorch. Nothing do with the cxx wrapper in buck
- aligned namespace to **cadence::impl:HIFI::native and cadence::impl:HIFI::kernel**
- kernel to be removed after all ops are migrated

Reviewed By: skrtskrtfb, mcremon-meta

Differential Revision: D64194227

fbshipit-source-id: 180ed472d9d9c5139064eb2e9b47480d80598f9d
  • Loading branch information
zonglinpeng authored and facebook-github-bot committed Oct 18, 2024
1 parent fad26af commit b1c94ab
Show file tree
Hide file tree
Showing 9 changed files with 32 additions and 13 deletions.
8 changes: 4 additions & 4 deletions backends/cadence/aot/functions_hifi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -107,21 +107,21 @@
variants: function
kernels:
- arg_meta: null
kernel_name: impl::HiFi::quantize_per_tensor_out
kernel_name: cadence::impl::HiFi::quantize_per_tensor_out

- func: cadence::dequantize_per_tensor.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)
variants: function
kernels:
- arg_meta: null
kernel_name: impl::HiFi::dequantize_per_tensor_out
kernel_name: cadence::impl::HiFi::dequantize_per_tensor_out


- func: cadence::quantized_layer_norm.out(Tensor input, Tensor in_scale, Tensor in_zero_point, int[] normalized_shape, Tensor weight, Tensor bias, float eps, float output_scale, int output_zero_point, *, Tensor(a!) out) -> Tensor(a!)
kernels:
- arg_meta: null
kernel_name: impl::HiFi::quantized_layer_norm_out
kernel_name: cadence::impl::HiFi::quantized_layer_norm_out

- func: cadence::quantized_linear.out(Tensor src, Tensor weight, Tensor bias, int src_zero_point, Tensor weight_zero_point, Tensor out_multiplier, Tensor out_shift, int out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!)
kernels:
- arg_meta: null
kernel_name: impl::HiFi::quantized_linear_out
kernel_name: cadence::impl::HiFi::quantized_linear_out
2 changes: 2 additions & 0 deletions backends/cadence/hifi/kernels/kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <xa_nnlib_common.h>
#include <xa_nnlib_common_macros.h>

namespace cadence {
namespace impl {
namespace HiFi {
namespace kernels {
Expand Down Expand Up @@ -231,3 +232,4 @@ typed_requantize_vec(uint8_t, int8_t);
}; // namespace kernels
}; // namespace HiFi
}; // namespace impl
}; // namespace cadence
2 changes: 2 additions & 0 deletions backends/cadence/hifi/kernels/kernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <stddef.h>
#include <xa_type_def.h>

namespace cadence {
namespace impl {
namespace HiFi {
namespace kernels {
Expand Down Expand Up @@ -63,3 +64,4 @@ void dequantize(
}; // namespace kernels
}; // namespace HiFi
}; // namespace impl
}; // namespace cadence
2 changes: 2 additions & 0 deletions backends/cadence/hifi/operators/dequantize_per_tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <executorch/runtime/kernel/kernel_includes.h>
#include <xa_nnlib_kernels_api.h>

namespace cadence {
namespace impl {
namespace HiFi {
namespace native {
Expand Down Expand Up @@ -50,3 +51,4 @@ void dequantize_per_tensor_out(
}; // namespace native
}; // namespace HiFi
}; // namespace impl
}; // namespace cadence
18 changes: 12 additions & 6 deletions backends/cadence/hifi/operators/quantize_per_tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <executorch/runtime/kernel/kernel_includes.h>
#include <xa_nnlib_kernels_api.h>

namespace cadence {
namespace impl {
namespace HiFi {
namespace native {
Expand All @@ -21,28 +22,32 @@ using executorch::runtime::KernelRuntimeContext;
// Quantize the input tensor (PT2 version). Note that quant_<min,max> are not
// used in any computation.
void quantize_per_tensor_out(
KernelRuntimeContext& context,
KernelRuntimeContext& ctx,
const Tensor& input,
double scale,
int64_t zero_point,
int64_t quant_min,
int64_t quant_max,
__ET_UNUSED int64_t quant_min,
__ET_UNUSED int64_t quant_max,
ScalarType dtype,
Tensor& out) {
const float* input_data = input.const_data_ptr<float>();
size_t numel = out.numel();
const size_t numel = out.numel();

if (out.scalar_type() == ScalarType::Byte) {
uint8_t* out_data = out.mutable_data_ptr<uint8_t>();
impl::HiFi::kernels::quantize<uint8_t>(
cadence::impl::HiFi::kernels::quantize<uint8_t>(
out_data, input_data, 1. / scale, zero_point, numel);
} else if (out.scalar_type() == ScalarType::Char) {
int8_t* out_data = out.mutable_data_ptr<int8_t>();
xa_nn_elm_quantize_f32_asym8s(
out_data, input_data, scale, zero_point, numel);
} else if (out.scalar_type() == ScalarType::Short) {
int16_t* out_data = out.mutable_data_ptr<int16_t>();
cadence::impl::HiFi::kernels::quantize<int16_t>(
out_data, input_data, 1. / scale, zero_point, numel);
} else if (out.scalar_type() == ScalarType::Int) {
int32_t* out_data = out.mutable_data_ptr<int32_t>();
impl::HiFi::kernels::quantize<int32_t>(
cadence::impl::HiFi::kernels::quantize<int32_t>(
out_data, input_data, 1. / scale, zero_point, numel);
} else {
ET_CHECK_MSG(false, "Unhandled input dtype %hhd", out.scalar_type());
Expand All @@ -52,3 +57,4 @@ void quantize_per_tensor_out(
}; // namespace native
}; // namespace HiFi
}; // namespace impl
}; // namespace cadence
6 changes: 4 additions & 2 deletions backends/cadence/hifi/operators/quantized_layer_norm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ using executorch::aten::Tensor;
using executorch::runtime::getLeadingDims;
using executorch::runtime::KernelRuntimeContext;

namespace cadence {
namespace impl {
namespace HiFi {
namespace native {
Expand Down Expand Up @@ -76,10 +77,10 @@ void quantized_layer_norm_(
for (size_t j = 0; j < last_dim; ++j) {
// Since X is quantized, we dequantize it, compute fp32 result, and
// quantize the result to an int8/uint8 value.
float val = impl::HiFi::kernels::dequantize<T>(
float val = cadence::impl::HiFi::kernels::dequantize<T>(
x[j], input_scale, input_zero_point);
val = (val - mean) * inv_std * weight_data[j] + bias_data[j];
y[j] = impl::HiFi::kernels::quantize<T>(
y[j] = cadence::impl::HiFi::kernels::quantize<T>(
val, output_inv_scale, output_zero_point);
}
}
Expand Down Expand Up @@ -157,3 +158,4 @@ void quantized_layer_norm_out(
}; // namespace native
}; // namespace HiFi
}; // namespace impl
}; // namespace cadence
4 changes: 3 additions & 1 deletion backends/cadence/hifi/operators/quantized_linear_out.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <algorithm>
#include <cmath>

namespace cadence {
namespace impl {
namespace HiFi {
namespace native {
Expand Down Expand Up @@ -45,7 +46,7 @@ void quantized_linear_out(
uint8_t* __restrict__ out_data = out.mutable_data_ptr<uint8_t>();

// The nnlib kernel to compute quantized linear via matmul.
int32_t ret = impl::HiFi::kernels::matmul_asym8uxasym8u_asym8u(
int32_t ret = cadence::impl::HiFi::kernels::matmul_asym8uxasym8u_asym8u(
out_data, // p_out
weight_data, // p_mat1,
in_data, // p_mat2,
Expand All @@ -69,3 +70,4 @@ void quantized_linear_out(
}; // namespace native
}; // namespace HiFi
}; // namespace impl
}; // namespace cadence
1 change: 1 addition & 0 deletions backends/cadence/hifi/operators/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,6 @@ def define_common_targets():
],
visibility = [
"//executorch/backends/cadence/...",
"@EXECUTORCH_CLIENTS",
],
)
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@

/*----------------------------Main function---------------------------------*/

namespace cadence {
namespace impl {
namespace HiFi {
namespace kernels {
Expand Down Expand Up @@ -436,3 +437,4 @@ WORD32 matmul_asym8uxasym8u_asym8u(
}; // namespace kernels
}; // namespace HiFi
}; // namespace impl
}; // namespace cadence

0 comments on commit b1c94ab

Please sign in to comment.