// Copyright Naoki Shibata and contributors 2010 - 2021. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include "misc.h" #ifndef CONFIG #error CONFIG macro not defined #endif #define ENABLE_DP //@#define ENABLE_DP #define ENABLE_SP //@#define ENABLE_SP #define LOG2VECTLENDP CONFIG //@#define LOG2VECTLENDP CONFIG #define VECTLENDP (1 << LOG2VECTLENDP) //@#define VECTLENDP (1 << LOG2VECTLENDP) #define LOG2VECTLENSP (LOG2VECTLENDP+1) //@#define LOG2VECTLENSP (LOG2VECTLENDP+1) #define VECTLENSP (1 << LOG2VECTLENSP) //@#define VECTLENSP (1 << LOG2VECTLENSP) #define ACCURATE_SQRT //@#define ACCURATE_SQRT #define DFTPRIORITY LOG2VECTLENDP #define ISANAME "Pure C Array" typedef union { uint32_t u[VECTLENDP*2]; uint64_t x[VECTLENDP]; double d[VECTLENDP]; float f[VECTLENDP*2]; int32_t i[VECTLENDP*2]; } versatileVector; typedef versatileVector vmask; typedef versatileVector vopmask; typedef versatileVector vdouble; typedef versatileVector vint; typedef versatileVector vfloat; typedef versatileVector vint2; typedef union { uint8_t u[sizeof(long double)*VECTLENDP]; long double ld[VECTLENDP]; } longdoubleVector; typedef longdoubleVector vmaskl; typedef longdoubleVector vlongdouble; #if defined(Sleef_quad2_DEFINED) && defined(ENABLEFLOAT128) typedef union { uint8_t u[sizeof(Sleef_quad)*VECTLENDP]; Sleef_quad q[VECTLENDP]; } quadVector; typedef quadVector vmaskq; typedef quadVector vquad; #endif // static INLINE int vavailability_i(int name) { return -1; } static INLINE void vprefetch_v_p(const void *ptr) { } static INLINE int vtestallones_i_vo64(vopmask g) { int ret = 1; for(int i=0;i 0 ? (int)(vd.d[i] + 0.5) : (int)(vd.d[i] - 0.5); return ret; } static INLINE vdouble vtruncate_vd_vd(vdouble vd) { return vcast_vd_vi(vtruncate_vi_vd(vd)); } static INLINE vdouble vrint_vd_vd(vdouble vd) { return vcast_vd_vi(vrint_vi_vd(vd)); } static INLINE vint vcast_vi_i(int j) { vint ret; for(int i=0;i y.d[i] ? x.d[i] : y.d[i]; return ret; } static INLINE vdouble vmin_vd_vd_vd(vdouble x, vdouble y) { vdouble ret; for(int i=0;i y.d[i] ? -1 : 0; return ret; } static INLINE vopmask vge_vo_vd_vd(vdouble x, vdouble y) { vopmask ret; for(int i=0;i= y.d[i] ? -1 : 0; return ret; } static INLINE vint vadd_vi_vi_vi(vint x, vint y) { vint ret; for(int i=0;i> c; return ret; } static INLINE vint vsra_vi_vi_i(vint x, int c) { vint ret; for(int i=0;i> c; return ret; } static INLINE vopmask veq_vo_vi_vi(vint x, vint y) { vopmask ret; for(int i=0;i y.i[i] ? -1 : 0; return ret; } static INLINE vint vsel_vi_vo_vi_vi(vopmask m, vint x, vint y) { union { vopmask vo; vint2 vi2; } cnv; cnv.vo = m; return vor_vi_vi_vi(vand_vi_vi_vi(vreinterpretFirstHalf_vi_vi2(cnv.vi2), x), vandnot_vi_vi_vi(vreinterpretFirstHalf_vi_vi2(cnv.vi2), y)); } static INLINE vopmask visinf_vo_vd(vdouble d) { vopmask ret; for(int i=0;i 0 ? (int)(vf.f[i] + 0.5) : (int)(vf.f[i] - 0.5); return ret; } static INLINE vint2 vcast_vi2_i(int j) { vint2 ret; for(int i=0;i y.f[i] ? x.f[i] : y.f[i]; return ret; } static INLINE vfloat vmin_vf_vf_vf(vfloat x, vfloat y) { vfloat ret; for(int i=0;i y.f[i]) ? -1 : 0); return ret; } static INLINE vopmask vge_vo_vf_vf(vfloat x, vfloat y) { vopmask ret; for(int i=0;i= y.f[i]) ? -1 : 0); return ret; } static INLINE vint vadd_vi2_vi2_vi2(vint x, vint y) { vint ret; for(int i=0;i> c; return ret; } static INLINE vint2 vsra_vi2_vi2_i(vint2 x, int c) { vint2 ret; for(int i=0;i> c; return ret; } static INLINE vopmask visinf_vo_vf (vfloat d) { vopmask ret; for(int i=0;i y.i[i] ? -1 : 0; return ret; } static INLINE vint2 veq_vi2_vi2_vi2(vint2 x, vint2 y) { vopmask ret; for(int i=0;i y.i[i] ? -1 : 0; return ret; } static INLINE vfloat vsqrt_vf_vf(vfloat x) { vfloat ret; for(int i=0;i