// Copyright 2022 Google LLC // // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. #pragma once #include #include #include "xnnpack.h" #include "xnnpack/common.h" #include "xnnpack/math.h" // Default: serves to differentiate pointer types for micro-kernels without fused activation. struct xnn_f16_default_params { char _; // Dummy member variable to comply with the C standard }; struct xnn_bf16_default_params { char _; // Dummy member variable to comply with the C standard }; struct xnn_f32_default_params { char _; // Dummy member variable to comply with the C standard }; struct xnn_s32_default_params { char _; // Dummy member variable to comply with the C standard }; // ReLU: serves to differentiate pointer types for micro-kernels with fused ReLU activation. struct xnn_f32_relu_params { char _; // Dummy member variable to comply with the C standard }; // Scale: used by RSUM microkernels struct xnn_f16_scale_params { struct { xnn_float16 scale; } scalar; }; struct xnn_f16_f32acc_scale_params { struct { float scale; } scalar; }; struct xnn_f32_scale_params { struct { float scale; } scalar; }; // Scale+Min+Max: used by AVGPOOL microkernels. struct xnn_f16_scaleminmax_params { struct { xnn_float16 scale; xnn_float16 min; xnn_float16 max; } scalar; }; struct xnn_f32_scaleminmax_params { struct { float scale; float min; float max; } scalar; }; // Min+Max: used by VCLAMP and GEMM/IGEMM/DWCONV/MAXPOOL/etc with MINMAX activation. struct xnn_bf16_minmax_params { struct { float min; float max; } scalar; }; union xnn_f16_minmax_params { struct { xnn_float16 min; xnn_float16 max; } scalar; }; union xnn_f32_minmax_params { struct { float min; float max; } scalar; }; struct xnn_f16_qc4w_minmax_params { struct { xnn_float16 min; xnn_float16 max; } scalar; }; struct xnn_f16_qb4w_minmax_params { struct { xnn_float16 min; xnn_float16 max; size_t blocksize; } scalar; }; struct xnn_f32_qc4w_minmax_params { struct { float min; float max; int32_t kernel_zero_point; } scalar; }; struct xnn_f32_qb4w_minmax_params { struct { float min; float max; size_t blocksize; } scalar; }; struct xnn_s8_minmax_params { struct { int32_t min; int32_t max; } scalar; }; struct xnn_u8_minmax_params { struct { uint32_t min; uint32_t max; } scalar; }; // Conv w. Min+Max: used by quantized GEMM/IGEMM/DWCONV microkernels with MINMAX activation. struct xnn_qd8_quantization_params { int32_t zero_point; float inv_scale; }; union xnn_qs8_conv_minmax_params { struct { float scale; int16_t output_zero_point; int16_t output_min; int16_t output_max; } fp32_scalar; struct { int32_t multiplier; uint32_t shift; int16_t output_min; int16_t output_max; int32_t output_zero_point; int64_t rounding; } rndnu_scalar; #if XNN_ARCH_ARM struct { float scale; float magic_bias; int32_t magic_bias_less_zero_point; uint32_t output_min; uint32_t output_max; } fp32_armsimd32; #endif // XNN_ARCH_ARM #if XNN_ARCH_ARM || XNN_ARCH_ARM64 struct { float scale; float magic_bias; int32_t magic_bias_less_output_zero_point; int8_t output_min; int8_t output_max; } fp32_neon; struct { float scale; int16_t output_zero_point; int8_t output_min; int8_t output_max; } fp32_neonv8; struct { int32_t right_pre_shift; int32_t multiplier; int32_t right_post_shift; int16_t output_zero_point; int8_t output_min; int8_t output_max; } rndnu_neon; #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 }; union xnn_qs8_qc8w_conv_minmax_params { struct { int16_t output_zero_point; int16_t output_min; int16_t output_max; } fp32_scalar; #if XNN_ARCH_ARM struct { float magic_bias; int32_t magic_bias_less_zero_point; uint32_t output_min; uint32_t output_max; } fp32_armsimd32; #endif // XNN_ARCH_ARM #if XNN_ARCH_ARM || XNN_ARCH_ARM64 struct { float magic_bias; int32_t magic_bias_less_output_zero_point; int8_t output_min; int8_t output_max; } fp32_neon; struct { int16_t output_zero_point; uint8_t output_min; uint8_t output_max; } fp32_neonv8; #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 }; union xnn_qu8_conv_minmax_params { struct { int32_t kernel_zero_point; float scale; int16_t output_zero_point; int16_t output_min; int16_t output_max; } fp32_scalar; struct { int32_t multiplier; uint32_t shift; int16_t output_min; int16_t output_max; int32_t output_zero_point; int32_t kernel_zero_point; int64_t rounding; } rndnu_scalar; struct { uint8_t kernel_zero_point; uint8_t padding[3]; int32_t left_pre_shift; int16_t multiplier; int16_t output_zero_point; uint8_t output_min; uint8_t output_max; } rndnu16_scalar; #if XNN_ARCH_ARM struct { float scale; float magic_bias; uint32_t minus_kernel_zero_point; int32_t magic_bias_less_zero_point; uint32_t output_min; uint32_t output_max; } fp32_armsimd32; #endif // XNN_ARCH_ARM #if XNN_ARCH_ARM || XNN_ARCH_ARM64 struct { uint8_t kernel_zero_point; uint8_t padding[3]; float scale; float magic_bias; int32_t magic_bias_less_output_zero_point; uint8_t output_min; uint8_t output_max; } fp32_neon; struct { uint8_t kernel_zero_point; uint8_t padding[3]; float scale; int16_t output_zero_point; uint8_t output_min; uint8_t output_max; } fp32_neonv8; struct { uint8_t kernel_zero_point; uint8_t padding[3]; int32_t right_pre_shift; int32_t multiplier; int32_t right_post_shift; int16_t output_zero_point; uint8_t output_min; uint8_t output_max; } rndnu_neon; #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 }; // Add w. Min+Max: used by quantized VADD[C] microkernels with MINMAX activation. struct xnn_qs8_add_minmax_params { struct { int8_t a_zero_point; int8_t b_zero_point; int32_t bias; int32_t a_multiplier; int32_t b_multiplier; int32_t shift; int16_t output_zero_point; int8_t output_min; int8_t output_max; } scalar; }; struct xnn_qu8_add_minmax_params { struct { uint8_t a_zero_point; uint8_t b_zero_point; int32_t bias; int32_t a_multiplier; int32_t b_multiplier; int32_t shift; int16_t output_zero_point; uint8_t output_min; uint8_t output_max; } scalar; }; // Mul w. Min+Max: used by quantized VMUL[C] microkernels with MINMAX activation. union xnn_qs8_mul_minmax_params { struct { int8_t a_zero_point; int8_t b_zero_point; float scale; int16_t output_zero_point; int8_t output_min; int8_t output_max; } scalar; #if XNN_ARCH_ARM || XNN_ARCH_ARM64 struct { int8_t a_zero_point; int8_t b_zero_point; int32_t left_pre_shift; int32_t multiplier; int32_t left_post_shift; int16_t output_zero_point; int8_t output_min; int8_t output_max; } rndnu_neon; #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 }; union xnn_qu8_mul_minmax_params { struct { uint8_t a_zero_point; uint8_t b_zero_point; float scale; int16_t output_zero_point; uint8_t output_min; uint8_t output_max; } scalar; #if XNN_ARCH_ARM || XNN_ARCH_ARM64 struct { uint8_t a_zero_point; uint8_t b_zero_point; int32_t left_pre_shift; int32_t multiplier; int32_t left_post_shift; int16_t output_zero_point; uint8_t output_min; uint8_t output_max; } rndnu_neon; #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 }; struct xnn_binary_reference_params { float a_scale; int32_t a_zero_point; float b_scale; int32_t b_zero_point; float inv_output_scale; int32_t output_zero_point; struct xnn_binary_params params; }; union xnn_binary_uparams { struct xnn_qs8_add_minmax_params qs8_addsub; struct xnn_qu8_add_minmax_params qu8_addsub; union xnn_qs8_mul_minmax_params qs8_mul; union xnn_qu8_mul_minmax_params qu8_mul; union xnn_f16_minmax_params f16; union xnn_f32_minmax_params f32; struct xnn_binary_reference_params reference; }; // RSum params used by RSUM & RDSUM microkernels. struct xnn_qs8_rsum_params { char _; // Dummy member variable to comply with the C standard }; struct xnn_f32_reduce_params { float scale; }; struct xnn_qs8_reduce_params { int32_t num_elements; float scale; float input_output_scale; int8_t input_zero_point; int8_t output_zero_point; }; struct xnn_qu8_reduce_params { int32_t num_elements; float scale; float input_output_scale; uint8_t input_zero_point; uint8_t output_zero_point; }; struct xnn_reduce_params { union { struct xnn_f32_reduce_params f32; struct xnn_qs8_reduce_params qs8; struct xnn_qu8_reduce_params qu8; }; }; // AvgPool w. Min+Max. struct xnn_qu8_avgpool_minmax_params { struct { int32_t init_bias; float scale; int16_t output_zero_point; int16_t output_min; int16_t output_max; } fp32_scalar; }; // Cvt (Convert): used by VCVT microkernels. struct xnn_f16_qs8_cvt_params { struct { xnn_float16 scale; int16_t output_zero_point; } scalar; }; struct xnn_f16_qu8_cvt_params { struct { xnn_float16 scale; int16_t output_zero_point; } scalar; }; struct xnn_f32_qs8_cvt_params { struct { float scale; int16_t output_zero_point; } scalar; }; struct xnn_f32_qu8_cvt_params { struct { float scale; int16_t output_zero_point; } scalar; }; struct xnn_s32_f32_cvt_params { struct { int32_t zero_point; } scalar; }; struct xnn_qs8_cvt_params { struct { int16_t input_zero_point; int32_t multiplier; int16_t output_zero_point; } scalar; }; struct xnn_qs8_f16_cvt_params { struct { int16_t zero_point; xnn_float16 scale; } scalar; }; struct xnn_qs8_f32_cvt_params { struct { int32_t zero_point; float scale; } scalar; }; struct xnn_qu8_cvt_params { struct { uint16_t input_zero_point; int16_t multiplier; int16_t output_zero_point; } scalar; }; struct xnn_qu8_f32_cvt_params { struct { int32_t zero_point; float scale; } scalar; }; // ELU: used by VELU microkernels. struct xnn_f16_elu_params { struct { xnn_float16 prescale; xnn_float16 alpha; xnn_float16 beta; } scalar; }; struct xnn_f32_elu_params { struct { float prescale; float alpha; float beta; } scalar; }; // LReLU (Leaky ReLU): used by VLRELU microkernels. struct xnn_f16_lrelu_params { struct { xnn_float16 slope; } scalar; }; struct xnn_f32_lrelu_params { struct { float slope; } scalar; }; struct xnn_qs8_lrelu_params { struct { int32_t input_zero_point; int32_t positive_multiplier; int32_t negative_multiplier; int32_t output_zero_point; } scalar; }; struct xnn_qu8_lrelu_params { struct { int32_t input_zero_point; int32_t positive_multiplier; int32_t negative_multiplier; int32_t output_zero_point; } scalar; }; struct xnn_qs8_packw_params { int8_t input_zero_point; }; struct xnn_qs8_qc4w_packing_params { int8_t input_zero_point; uint8_t kernel_zero_point; }; struct xnn_x32_packb_params { char _; // Dummy member variable to comply with the C standard }; struct xnn_unary_reference_params { float x_scale; int32_t x_zero_point; float inv_y_scale; int32_t y_zero_point; union xnn_unary_params params; }; union xnn_unary_uparams { struct xnn_f32_qs8_cvt_params f32_qs8_cvt; struct xnn_f32_qu8_cvt_params f32_qu8_cvt; struct xnn_f16_qs8_cvt_params f16_qs8_cvt; struct xnn_qs8_f32_cvt_params qs8_f32_cvt; struct xnn_qu8_f32_cvt_params qu8_f32_cvt; struct xnn_qs8_f16_cvt_params qs8_f16_cvt; struct xnn_s32_f32_cvt_params s32_f32_cvt; struct xnn_qs8_cvt_params qs8_cvt; struct xnn_qu8_cvt_params qu8_cvt; struct xnn_f16_elu_params f16_elu; struct xnn_f32_elu_params f32_elu; struct xnn_f16_lrelu_params f16_lrelu; struct xnn_f32_lrelu_params f32_lrelu; struct xnn_qs8_lrelu_params qs8_lrelu; struct xnn_qu8_lrelu_params qu8_lrelu; union xnn_f32_minmax_params f32_minmax; union xnn_f16_minmax_params f16_minmax; struct xnn_s8_minmax_params s8_minmax; struct xnn_u8_minmax_params u8_minmax; struct xnn_unary_reference_params reference; }; struct subconvolution_params { void* weights; size_t w_stride; const void** indirection_buffer; void* output; size_t slice_width; size_t slice_height; size_t indirection_y_stride; size_t indirection_x_stride; // scaled_kernel_size := kernel_size * mr * sizeof(void*). size_t scaled_kernel_size; };