// Copyright Naoki Shibata and contributors 2010 - 2023. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include #include #include #include #include "sleef.h" #include "misc.h" #include "testerutil.h" #ifdef __VSX__ #include #undef vector #undef bool typedef __vector double __vector_double; typedef __vector float __vector_float; #endif #if defined(__VX__) && defined(__VEC__) #ifndef SLEEF_VECINTRIN_H_INCLUDED #include #define SLEEF_VECINTRIN_H_INCLUDED #endif typedef __attribute__((vector_size(16))) double vector_double; typedef __attribute__((vector_size(16))) float vector_float; #endif // #define XNAN (((union { int64_t u; double d; }) { .u = INT64_C(0xffffffffffffffff) }).d) #define XNANf (((union { int32_t u; float d; }) { .u = 0xffffffff }).d) static INLINE double unifyValue(double x) { x = !(x == x) ? XNAN : x; return x; } static INLINE float unifyValuef(float x) { x = !(x == x) ? XNANf : x; return x; } static INLINE double setdouble(double d, int r) { return d; } static INLINE double getdouble(double v, int r) { return unifyValue(v); } static INLINE float setfloat(float d, int r) { return d; } static INLINE float getfloat(float v, int r) { return unifyValuef(v); } #if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) static INLINE __m128d set__m128d(double d, int r) { static double a[2]; memrand(a, sizeof(a)); a[r & 1] = d; return _mm_loadu_pd(a); } static INLINE double get__m128d(__m128d v, int r) { static double a[2]; _mm_storeu_pd(a, v); return unifyValue(a[r & 1]); } static INLINE __m128 set__m128(float d, int r) { static float a[4]; memrand(a, sizeof(a)); a[r & 3] = d; return _mm_loadu_ps(a); } static INLINE float get__m128(__m128 v, int r) { static float a[4]; _mm_storeu_ps(a, v); return unifyValuef(a[r & 3]); } #if defined(__AVX__) static INLINE __m256d set__m256d(double d, int r) { static double a[4]; memrand(a, sizeof(a)); a[r & 3] = d; return _mm256_loadu_pd(a); } static INLINE double get__m256d(__m256d v, int r) { static double a[4]; _mm256_storeu_pd(a, v); return unifyValue(a[r & 3]); } static INLINE __m256 set__m256(float d, int r) { static float a[8]; memrand(a, sizeof(a)); a[r & 7] = d; return _mm256_loadu_ps(a); } static INLINE float get__m256(__m256 v, int r) { static float a[8]; _mm256_storeu_ps(a, v); return unifyValuef(a[r & 7]); } #endif #if defined(__AVX512F__) static INLINE __m512d set__m512d(double d, int r) { static double a[8]; memrand(a, sizeof(a)); a[r & 7] = d; return _mm512_loadu_pd(a); } static INLINE double get__m512d(__m512d v, int r) { static double a[8]; _mm512_storeu_pd(a, v); return unifyValue(a[r & 7]); } static INLINE __m512 set__m512(float d, int r) { static float a[16]; memrand(a, sizeof(a)); a[r & 15] = d; return _mm512_loadu_ps(a); } static INLINE float get__m512(__m512 v, int r) { static float a[16]; _mm512_storeu_ps(a, v); return unifyValuef(a[r & 15]); } #endif #endif // #if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) #if defined(__aarch64__) && defined(__ARM_NEON) static INLINE VECTOR_CC float64x2_t setfloat64x2_t(double d, int r) { double a[2]; memrand(a, sizeof(a)); a[r & 1] = d; return vld1q_f64(a); } static INLINE VECTOR_CC double getfloat64x2_t(float64x2_t v, int r) { double a[2]; vst1q_f64(a, v); return unifyValue(a[r & 1]); } static INLINE VECTOR_CC float32x4_t setfloat32x4_t(float d, int r) { float a[4]; memrand(a, sizeof(a)); a[r & 3] = d; return vld1q_f32(a); } static INLINE VECTOR_CC float getfloat32x4_t(float32x4_t v, int r) { float a[4]; vst1q_f32(a, v); return unifyValuef(a[r & 3]); } #endif #ifdef __ARM_FEATURE_SVE static INLINE svfloat64_t setsvfloat64_t(double d, int r) { double a[svcntd()]; memrand(a, sizeof(a)); a[r & (svcntd()-1)] = d; return svld1_f64(svptrue_b8(), a); } static INLINE double getsvfloat64_t(svfloat64_t v, int r) { double a[svcntd()]; svst1_f64(svptrue_b8(), a, v); return unifyValue(a[r & (svcntd()-1)]); } static INLINE svfloat32_t setsvfloat32_t(float d, int r) { float a[svcntw()]; memrand(a, sizeof(a)); a[r & (svcntw()-1)] = d; return svld1_f32(svptrue_b8(), a); } static INLINE float getsvfloat32_t(svfloat32_t v, int r) { float a[svcntw()]; svst1_f32(svptrue_b8(), a, v); return unifyValuef(a[r & (svcntw()-1)]); } static svfloat64_t vd2getx_vd_vd2(svfloat64x2_t v) { return svget2_f64(v, 0); } static svfloat64_t vd2gety_vd_vd2(svfloat64x2_t v) { return svget2_f64(v, 1); } static svfloat32_t vf2getx_vf_vf2(svfloat32x2_t v) { return svget2_f32(v, 0); } static svfloat32_t vf2gety_vf_vf2(svfloat32x2_t v) { return svget2_f32(v, 1); } #endif #ifdef __VSX__ static INLINE __vector double setSLEEF_VECTOR_DOUBLE(double d, int r) { double a[2]; memrand(a, sizeof(a)); a[r & 1] = d; return vec_vsx_ld(0, a); } static INLINE double getSLEEF_VECTOR_DOUBLE(__vector double v, int r) { double a[2]; vec_vsx_st(v, 0, a); return unifyValue(a[r & 1]); } static INLINE __vector float setSLEEF_VECTOR_FLOAT(float d, int r) { float a[4]; memrand(a, sizeof(a)); a[r & 3] = d; return vec_vsx_ld(0, a); } static INLINE float getSLEEF_VECTOR_FLOAT(__vector float v, int r) { float a[4]; vec_vsx_st(v, 0, a); return unifyValuef(a[r & 3]); } #endif #ifdef __VX__ static INLINE __attribute__((vector_size(16))) double setSLEEF_VECTOR_DOUBLE(double d, int r) { double a[2]; memrand(a, sizeof(a)); a[r & 1] = d; return (__attribute__((vector_size(16))) double) { a[0], a[1] }; } static INLINE double getSLEEF_VECTOR_DOUBLE(__attribute__((vector_size(16))) double v, int r) { return unifyValue(v[r & 1]); } static INLINE __attribute__((vector_size(16))) float setSLEEF_VECTOR_FLOAT(float d, int r) { float a[4]; memrand(a, sizeof(a)); a[r & 3] = d; return (__attribute__((vector_size(16))) float) { a[0], a[1], a[2], a[3] }; } static INLINE float getSLEEF_VECTOR_FLOAT(__attribute__((vector_size(16))) float v, int r) { return unifyValuef(v[r & 3]); } #endif #if __riscv && __riscv_v #if defined(ENABLE_RVVM1) #define VECTLENSP (1 * __riscv_vlenb() / sizeof(float)) #define VECTLENDP (1 * __riscv_vlenb() / sizeof(double)) static INLINE vfloat32m1_t setvfloat32m1_t(float d, int r) { float a[VECTLENSP]; memrand(a, sizeof(a)); a[r & (VECTLENSP-1)] = d; return __riscv_vle32_v_f32m1(a, VECTLENSP); } static INLINE float getvfloat32m1_t(vfloat32m1_t v, int r) { float a[VECTLENSP]; __riscv_vse32(a, v, VECTLENSP); return unifyValuef(a[r & (VECTLENSP-1)]); } static INLINE vfloat64m1_t setvfloat64m1_t(double d, int r) { double a[VECTLENDP]; memrand(a, sizeof(a)); a[r & (VECTLENDP-1)] = d; return __riscv_vle64_v_f64m1(a, VECTLENDP); } static INLINE double getvfloat64m1_t(vfloat64m1_t v, int r) { double a[VECTLENDP]; __riscv_vse64(a, v, VECTLENDP); return unifyValue(a[r & (VECTLENDP-1)]); } static vfloat32m1_t vf2getx_vf_vf2(vfloat32m2_t v) { return __riscv_vget_f32m1(v, 0); } static vfloat32m1_t vf2gety_vf_vf2(vfloat32m2_t v) { return __riscv_vget_f32m1(v, 1); } static vfloat64m1_t vd2getx_vd_vd2(vfloat64m2_t v) { return __riscv_vget_f64m1(v, 0); } static vfloat64m1_t vd2gety_vd_vd2(vfloat64m2_t v) { return __riscv_vget_f64m1(v, 1); } #elif defined(ENABLE_RVVM2) #define VECTLENSP (2 * __riscv_vlenb() / sizeof(float)) #define VECTLENDP (2 * __riscv_vlenb() / sizeof(double)) static INLINE vfloat32m2_t setvfloat32m2_t(float d, int r) { float a[VECTLENSP]; memrand(a, sizeof(a)); a[r & (VECTLENSP-1)] = d; return __riscv_vle32_v_f32m2(a, VECTLENSP); } static INLINE float getvfloat32m2_t(vfloat32m2_t v, int r) { float a[VECTLENSP]; __riscv_vse32(a, v, VECTLENSP); return unifyValuef(a[r & (VECTLENSP-1)]); } static INLINE vfloat64m2_t setvfloat64m2_t(double d, int r) { double a[VECTLENDP]; memrand(a, sizeof(a)); a[r & (VECTLENDP-1)] = d; return __riscv_vle64_v_f64m2(a, VECTLENDP); } static INLINE double getvfloat64m2_t(vfloat64m2_t v, int r) { double a[VECTLENDP]; __riscv_vse64(a, v, VECTLENDP); return unifyValue(a[r & (VECTLENDP-1)]); } static vfloat32m2_t vf2getx_vf_vf2(vfloat32m4_t v) { return __riscv_vget_f32m2(v, 0); } static vfloat32m2_t vf2gety_vf_vf2(vfloat32m4_t v) { return __riscv_vget_f32m2(v, 1); } static vfloat64m2_t vd2getx_vd_vd2(vfloat64m4_t v) { return __riscv_vget_f64m2(v, 0); } static vfloat64m2_t vd2gety_vd_vd2(vfloat64m4_t v) { return __riscv_vget_f64m2(v, 1); } #else #error "unknown RVV" #endif #undef VECTLENSP #undef VECTLENDP #endif // // ATR = cinz_, NAME = sin, TYPE = d2, ULP = u35, EXT = sse2 #define FUNC(ATR, NAME, TYPE, ULP, EXT) Sleef_ ## ATR ## NAME ## TYPE ## _ ## ULP ## EXT #define _TYPE2(TYPE) Sleef_ ## TYPE ## _2 #define TYPE2(TYPE) _TYPE2(TYPE) #define SET(TYPE) set ## TYPE #define GET(TYPE) get ## TYPE #if !defined(__ARM_FEATURE_SVE) && !(defined(__riscv) && defined(__riscv_v)) static DPTYPE vd2getx_vd_vd2(TYPE2(DPTYPE) v) { return v.x; } static DPTYPE vd2gety_vd_vd2(TYPE2(DPTYPE) v) { return v.y; } static SPTYPE vf2getx_vf_vf2(TYPE2(SPTYPE) v) { return v.x; } static SPTYPE vf2gety_vf_vf2(TYPE2(SPTYPE) v) { return v.y; } #endif // #define initDigest \ EVP_MD_CTX *ctx; ctx = EVP_MD_CTX_new(); \ if (!ctx) { \ fprintf(stderr, "Error creating context.\n"); \ return 0; \ } \ if (!EVP_DigestInit_ex(ctx, EVP_md5(), NULL)) { \ fprintf(stderr, "Error initializing context.\n"); \ return 0; \ } #define checkDigest(NAME, ULP) do { \ unsigned int md5_digest_len = EVP_MD_size(EVP_md5()); \ unsigned char *md5_digest; \ md5_digest = (unsigned char *)malloc(md5_digest_len); \ if (!EVP_DigestFinal_ex(ctx, md5_digest, &md5_digest_len)) { \ fprintf(stderr, "Error finalizing digest.\n"); \ return 0; \ } \ EVP_MD_CTX_free(ctx); \ unsigned char mes[64], buf[64]; \ memset(mes, 0, 64); \ sprintf((char *)mes, "%s ", #NAME " " #ULP); \ char tmp[3] = { 0 }; \ for (int i = 0; i < md5_digest_len; i++) { \ sprintf(tmp, "%02x", md5_digest[i]); \ strcat((char *)mes, tmp); \ } \ free(md5_digest); \ if (fp != NULL) { \ fgets((char *)buf, 60, fp); \ if (strncmp((char *)mes, (char *)buf, strlen((char *)mes)) != 0) { \ puts((char *)mes); \ puts((char *)buf); \ success = 0; \ } \ } else puts((char *)mes); \ } while(0) #if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) #define convertEndianness(ptr, len) do { \ for(int k=0;k