// Auto-generated file. Do not edit! // Template: src/f32-qs8-vcvt/rvv.c.in // Generator: tools/xngen // // Copyright 2024 Imagination Technologies, inc. // // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. #include #include #include "xnnpack/math.h" #include "xnnpack/vcvt.h" void xnn_f32_qs8_vcvt_ukernel__rvv_u2v( size_t batch, const float* input, int8_t* output, const struct xnn_f32_qs8_cvt_params params[restrict XNN_MIN_ELEMENTS(1)]) { assert(batch != 0); assert(batch % sizeof(float) == 0); assert(input != NULL); assert(output != NULL); batch >>= XNN_LOG2_SIZEOF_FLOAT; const float scale = params->scalar.scale; // TODO: Clamp may not be necessary. RISCV spec doesn't say if vncvt saturates... const float output_min_less_zero_point = (float) ((int32_t) -128 - (int32_t) params->scalar.output_zero_point); const float output_max_less_zero_point = (float) ((int32_t) 127 - (int32_t) params->scalar.output_zero_point); const int32_t output_zero_point = params->scalar.output_zero_point; for (; batch > 0; ) { const int32_t n = __riscv_vsetvl_e32m2(batch); batch -= n; vfloat32m2_t x_f32v = __riscv_vle32_v_f32m2(input, n); input += n; x_f32v = __riscv_vfmul_vf_f32m2(x_f32v, scale, n); x_f32v = __riscv_vfmax_vf_f32m2(x_f32v, output_min_less_zero_point, n); x_f32v = __riscv_vfmin_vf_f32m2(x_f32v, output_max_less_zero_point, n); vint32m2_t y_i32v = __riscv_vfcvt_x_f_v_i32m2(x_f32v, n); y_i32v = __riscv_vadd_vx_i32m2(y_i32v, output_zero_point, n); __riscv_vse8_v_i8mf2(output, __riscv_vncvt_x_x_w_i8mf2(__riscv_vncvt_x_x_w_i16m1(y_i32v, n), n), n); output += n; } }