// Copyright 2024 Google LLC // // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. // // Provides generic implementations for some SIMD functions, i.e. // implementations that don't rely on architecture-specific instructions and // just use the basic wrapped SIMD intrinsics. #ifndef THIRD_PARTY_XNNPACK_INCLUDE_SIMD_F32_GENERIC_FUNCTIONS_H_ #define THIRD_PARTY_XNNPACK_INCLUDE_SIMD_F32_GENERIC_FUNCTIONS_H_ #include #include #include #include "xnnpack/common.h" // Forward declarations of basic SIMD intrinsics. static XNN_INLINE xnn_simd_f32_t xnn_sub_f32(xnn_simd_f32_t a, xnn_simd_f32_t b); static XNN_INLINE xnn_simd_f32_t xnn_and_f32(xnn_simd_f32_t a, xnn_simd_f32_t b); static XNN_INLINE xnn_simd_f32_t xnn_or_f32(xnn_simd_f32_t a, xnn_simd_f32_t b); #ifndef XNN_SIMD_SHIFTS_ARE_MACROS static XNN_INLINE xnn_simd_f32_t xnn_srl_f32(xnn_simd_f32_t a, uint8_t b); #endif // XNN_SIMD_SHIFTS_ARE_MACROS // Extracts the exponent of the input `a` as a `float` value. static XNN_INLINE xnn_simd_f32_t xnn_generic_getexp_f32(xnn_simd_f32_t a) { // Some useful constants. XNN_SIMD_CONST_F32_FROM_INT32(exp_mask, 0x7f800000); XNN_SIMD_CONST_F32(bias_256, 256.0f); XNN_SIMD_CONST_F32(bias_383, 383.0f); // The bits of IEE754 single-precision floating-point format are: // // s | e e e e e e e e | m m m m m m m m m m m m m m m m m m m m m m m // // We start by masking out the exponent and shifting it 8 bits to the right: // // 0 | 0 0 0 0 0 0 0 0 | e e e e e e e e 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 // // These bits are then `or`-ed with `256.0f`, which has a biased exponent of // `135` and all mantissa bit set to zero. This is equivalent to adding the // biased integer exponent to `256.0`: // // 0 | 1 0 0 0 0 1 1 1 | e e e e e e e e 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 // // We can re-extract the exponent as a `float` value by subtracting `256.0` // plus the exponent bias `127.0`, i.e. `383.0`. // Extract the exponent and shift the exponent to the most significant bits of // the mantissa. const xnn_simd_f32_t exp = xnn_srl_f32(xnn_and_f32(a, exp_mask), 8); // Add the shifted exponent to `256.0f` by copying its bits to the mantissa, // then subtract out `383.0f`, i.e. the original `256.0f` plus the `127` // exponent bias, resulting in the unbiased exponent. return xnn_sub_f32(xnn_or_f32(bias_256, exp), bias_383); } #endif // THIRD_PARTY_XNNPACK_INCLUDE_SIMD_F32_GENERIC_FUNCTIONS_H_