kfr

Fast, modern C++ DSP framework, FFT, Sample Rate Conversion, FIR/IIR/Biquad Filters (SSE, AVX, AVX-512, ARM NEON)
Log | Files | Refs | README

commit 3945a8f00b5062cc698afda300a07fddcdf3ba3a
parent 6ff1acd1d2216a0655755c48a805cb14c130c150
Author: d.levin256@gmail.com <d.levin256@gmail.com>
Date:   Mon, 25 Jul 2016 13:28:15 +0300

Refactoring: Improve compilation time

Diffstat:
Mexamples/biquads.cpp | 2--
Mexamples/dft.cpp | 5++---
Mexamples/fir.cpp | 2--
Mexamples/resampling.cpp | 5-----
Mexamples/window.cpp | 2--
Minclude/kfr/base/abs.hpp | 128+++++++++++++++++++++++++------------------------------------------------------
Minclude/kfr/base/asin_acos.hpp | 66+++++++++++++++++++++---------------------------------------------
Minclude/kfr/base/atan.hpp | 407+++++++++++++++++++++++++++++++++++++++----------------------------------------
Ainclude/kfr/base/clamp.hpp | 73+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Minclude/kfr/base/complex.hpp | 455++++++++++++++++++++++++++++++++++---------------------------------------------
Minclude/kfr/base/gamma.hpp | 66+++++++++++++++++++++++++++---------------------------------------
Minclude/kfr/base/hyperbolic.hpp | 178++++++++++++++++++++++++++++++++++++-------------------------------------------
Minclude/kfr/base/log_exp.hpp | 768++++++++++++++++++++++++++++++++++++++-----------------------------------------
Minclude/kfr/base/logical.hpp | 466+++++++++++++++++++++++++++++--------------------------------------------------
Minclude/kfr/base/min_max.hpp | 436+++++++++++++++++++++----------------------------------------------------------
Ainclude/kfr/base/modzerobessel.hpp | 113+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Minclude/kfr/base/operators.hpp | 36++++++++++++++++++++++++++++++++++++
Minclude/kfr/base/round.hpp | 377++++++++++++++++++++++++++++++++-----------------------------------------------
Minclude/kfr/base/saturation.hpp | 246+++++++++++++++++++++++++++++++------------------------------------------------
Minclude/kfr/base/select.hpp | 205++++++++++++++++++++-----------------------------------------------------------
Minclude/kfr/base/sin_cos.hpp | 685+++++++++++++++++++++++++++++++++----------------------------------------------
Minclude/kfr/base/sqrt.hpp | 79++++++++++++++++++++++---------------------------------------------------------
Minclude/kfr/base/tan.hpp | 227+++++++++++++++++++++++++++++++++++--------------------------------------------
Minclude/kfr/dft/conv.hpp | 1-
Minclude/kfr/dft/fft.hpp | 4++--
Minclude/kfr/dft/ft.hpp | 2+-
Minclude/kfr/dsp/fir.hpp | 115+++++++++++++++++++++++++++++++++----------------------------------------------
Minclude/kfr/dsp/fir_design.hpp | 256+++++++++++++++++++++++++------------------------------------------------------
Minclude/kfr/dsp/fracdelay.hpp | 8++------
Minclude/kfr/dsp/oscillators.hpp | 300+++++++++++++++++++++++++++++++++----------------------------------------------
Minclude/kfr/dsp/resample.hpp | 303++++++++++++++++++++++++++++++++++++-------------------------------------------
Minclude/kfr/dsp/units.hpp | 239++++++++++++++++++++++++++++++++++++-------------------------------------------
Minclude/kfr/dsp/weighting.hpp | 139+++++++++++++++++++++++++++++++++++++------------------------------------------
Minclude/kfr/dsp/window.hpp | 901++++++++++++++++++++++++++++++++++++-------------------------------------------
Minclude/kfr/expressions/reduce.hpp | 228+++++++++++++++++++++++++++-----------------------------------------------------
Minclude/kfr/math.hpp | 4----
Msources.cmake | 3++-
Mtests/basic_vector_test.cpp | 119+++++++++++++++++++++++++------------------------------------------------------
Mtests/complex_test.cpp | 5++++-
Mtests/conv_test.cpp | 2+-
Mtests/dft_test.cpp | 1-
Mtests/empty_test.cpp | 2--
Mtests/fracdelay_test.cpp | 16++++++++--------
Mtests/stat_test.cpp | 40++++++++++++++++++++--------------------
44 files changed, 3271 insertions(+), 4444 deletions(-)

diff --git a/examples/biquads.cpp b/examples/biquads.cpp @@ -25,13 +25,11 @@ #include <kfr/io/python_plot.hpp> using namespace kfr; -using namespace kfr::native; int main(int argc, char** argv) { println(library_version()); - using namespace native; const std::string options = "phaseresp=True"; univector<double, 128> output; diff --git a/examples/dft.cpp b/examples/dft.cpp @@ -17,7 +17,6 @@ #include <kfr/dsp/oscillators.hpp> #include <kfr/dsp/units.hpp> #include <kfr/expressions/basic.hpp> -#include <kfr/expressions/operators.hpp> #include <kfr/expressions/reduce.hpp> #include <kfr/math.hpp> #include <kfr/misc/random.hpp> @@ -52,8 +51,8 @@ int main(int argc, char** argv) // get magnitude and convert to decibels univector<float_type, size> dB = amp_to_dB(cabs(out)); - println("max = ", max(dB)); - println("min = ", min(dB)); + println("max = ", maxof(dB)); + println("min = ", minof(dB)); println("mean = ", mean(dB)); println("rms = ", rms(dB)); diff --git a/examples/fir.cpp b/examples/fir.cpp @@ -30,13 +30,11 @@ #include <iostream> using namespace kfr; -using namespace kfr::native; int main(int argc, char** argv) { println(library_version()); - using namespace native; const std::string options = "phaseresp=False"; univector<double, 15> taps15; diff --git a/examples/resampling.cpp b/examples/resampling.cpp @@ -21,16 +21,12 @@ // swept #include <kfr/dsp/oscillators.hpp> -// operator overloading for expressions -#include <kfr/expressions/operators.hpp> - // plot_save() #include <kfr/io/python_plot.hpp> #include <iostream> using namespace kfr; -using namespace kfr::native; constexpr size_t input_sr = 96000; constexpr size_t output_sr = 44100; @@ -41,7 +37,6 @@ int main(int argc, char** argv) { println(library_version()); - using namespace native; const std::string options = "phaseresp=False"; univector<f64> swept_sine = swept(0.5, len); diff --git a/examples/window.cpp b/examples/window.cpp @@ -22,13 +22,11 @@ #include <kfr/io/python_plot.hpp> using namespace kfr; -using namespace kfr::native; int main(int argc, char** argv) { println(library_version()); - using namespace native; const std::string options = "freqresp=True, dots=True, padwidth=1024, " "log_freq=False, horizontal=False, normalized_freq=True"; diff --git a/include/kfr/base/abs.hpp b/include/kfr/base/abs.hpp @@ -26,114 +26,68 @@ #include "operators.hpp" #include "select.hpp" -#pragma clang diagnostic push -#if CID_HAS_WARNING("-Winaccessible-base") -#pragma clang diagnostic ignored "-Winaccessible-base" -#endif - namespace kfr { namespace internal { - -template <cpu_t cpu = cpu_t::native, cpu_t cc = cpu> -struct in_abs : in_abs<older(cpu), cc> +// floating point +template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)> +KFR_SINTRIN vec<T, N> abs(vec<T, N> x) { - struct fn_abs : in_abs<older(cpu), cc>::fn_abs, fn_disabled - { - }; -}; - -template <cpu_t cc> -struct in_abs<cpu_t::common, cc> : in_select<cc> -{ - constexpr static cpu_t cpu = cpu_t::common; - -private: - using in_select<cc>::select; - -public: - template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)> - KFR_SINTRIN vec<T, N> abs(vec<T, N> value) - { - return select(value >= T(), value, -value); - } - template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)> - KFR_SINTRIN vec<T, N> abs(vec<T, N> value) - { - return value & invhighbitmask<T>; - } - - KFR_HANDLE_SCALAR(abs) - KFR_SPEC_FN(in_abs, abs) -}; + return x & internal::invhighbitmask<T>; +} -#ifdef CID_ARCH_X86 +#if defined CID_ARCH_SSSE3 -template <cpu_t cc> -struct in_abs<cpu_t::ssse3, cc> : in_select<cc> +template <typename T, size_t N> +KFR_SINTRIN vec<i64, N> abs(vec<i64, N> x) { - constexpr static cpu_t cpu = cpu_t::ssse3; - -private: - using in_select<cc>::select; - -public: - template <size_t N> - KFR_SINTRIN vec<i64, N> abs(vec<i64, N> value) - { - return select(value >= 0, value, -value); - } - - KFR_CPU_INTRIN(ssse3) i32sse abs(i32sse value) { return _mm_abs_epi32(*value); } - KFR_CPU_INTRIN(ssse3) i16sse abs(i16sse value) { return _mm_abs_epi16(*value); } - KFR_CPU_INTRIN(ssse3) i8sse abs(i8sse value) { return _mm_abs_epi8(*value); } - - template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)> - KFR_SINTRIN vec<T, N> abs(vec<T, N> value) - { - return value & invhighbitmask<T>; - } - - KFR_HANDLE_ALL(abs) - KFR_HANDLE_SCALAR(abs) - KFR_SPEC_FN(in_abs, abs) -}; + return select(x >= T(), x, -x); +} +KFR_SINTRIN i32sse abs(i32sse value) { return _mm_abs_epi32(*value); } +KFR_SINTRIN i16sse abs(i16sse value) { return _mm_abs_epi16(*value); } +KFR_SINTRIN i8sse abs(i8sse value) { return _mm_abs_epi8(*value); } + +#if defined CID_ARCH_AVX2 +KFR_SINTRIN i32avx abs(i32avx value) { return _mm256_abs_epi32(*value); } +KFR_SINTRIN i16avx abs(i16avx value) { return _mm256_abs_epi16(*value); } +KFR_SINTRIN i8avx abs(i8avx value) { return _mm256_abs_epi8(*value); } +#endif -template <cpu_t cc> -struct in_abs<cpu_t::avx2, cc> : in_abs<cpu_t::ssse3, cc> +template <typename T, size_t N, KFR_ENABLE_IF(N < vector_width<T, cpu_t::native> && !is_f_class<T>::value)> +KFR_SINTRIN vec<T, N> abs(vec<T, N> a) { - constexpr static cpu_t cpu = cpu_t::avx2; - using in_abs<cpu_t::ssse3, cc>::abs; + return slice<0, N>(abs(expand_simd(a))); +} +template <typename T, size_t N, KFR_ENABLE_IF(N >= vector_width<T, cpu_t::native> && !is_f_class<T>::value)> +KFR_SINTRIN vec<T, N> abs(vec<T, N> a) +{ + return concat(abs(low(a)), abs(high(a))); +} - KFR_CPU_INTRIN(avx2) i32avx abs(i32avx value) { return _mm256_abs_epi32(*value); } - KFR_CPU_INTRIN(avx2) i16avx abs(i16avx value) { return _mm256_abs_epi16(*value); } - KFR_CPU_INTRIN(avx2) i8avx abs(i8avx value) { return _mm256_abs_epi8(*value); } +#else - KFR_HANDLE_ALL(abs) - KFR_HANDLE_SCALAR(abs) - KFR_SPEC_FN(in_abs, abs) -}; +// fallback +template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)> +KFR_SINTRIN vec<T, N> abs(vec<T, N> x) +{ + return select(value >= T(), value, -value); +} #endif +KFR_HANDLE_SCALAR_1(abs) +KFR_FN(abs) } -namespace native -{ -using fn_abs = internal::in_abs<>::fn_abs; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> abs(const T1& x) +KFR_INTRIN T1 abs(const T1& x) { - return internal::in_abs<>::abs(x); + return internal::abs(x); } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> - -KFR_INTRIN expr_func<fn_abs, E1> abs(E1&& x) +KFR_INTRIN expr_func<internal::fn_abs, E1> abs(E1&& x) { - return { fn_abs(), std::forward<E1>(x) }; -} + return { {}, std::forward<E1>(x) }; } } - -#pragma clang diagnostic pop diff --git a/include/kfr/base/asin_acos.hpp b/include/kfr/base/asin_acos.hpp @@ -22,79 +22,55 @@ */ #pragma once -#include "abs.hpp" #include "atan.hpp" -#include "constants.hpp" #include "function.hpp" -#include "min_max.hpp" -#include "operators.hpp" #include "select.hpp" -#include "shuffle.hpp" #include "sqrt.hpp" -#pragma clang diagnostic push -#if CID_HAS_WARNING("-Winaccessible-base") -#pragma clang diagnostic ignored "-Winaccessible-base" -#endif - namespace kfr { namespace internal { -template <cpu_t cpu = cpu_t::native> -struct in_asin_acos : private in_select<cpu>, private in_atan<cpu>, private in_sqrt<cpu> +template <typename T, size_t N> +KFR_SINTRIN vec<T, N> asin(vec<T, N> x) { -private: - using in_atan<cpu>::atan2; - using in_sqrt<cpu>::sqrt; - -public: - template <typename T, size_t N> - KFR_SINTRIN vec<T, N> asin(vec<T, N> x) - { - return atan2(x, sqrt(T(1) - x * x)); - } - - template <typename T, size_t N> - KFR_SINTRIN vec<T, N> acos(vec<T, N> x) - { - return atan2(sqrt(T(1) - x * x), x); - } - KFR_SPEC_FN(in_asin_acos, asin) - KFR_SPEC_FN(in_asin_acos, acos) -}; + return atan2(x, sqrt(T(1) - x * x)); } -namespace native +template <typename T, size_t N> +KFR_SINTRIN vec<T, N> acos(vec<T, N> x) { -using fn_asin = internal::in_asin_acos<>::fn_asin; + return atan2(sqrt(T(1) - x * x), x); +} +KFR_HANDLE_SCALAR(asin) +KFR_HANDLE_SCALAR(acos) +KFR_FN(asin) +KFR_FN(acos) +} + template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> asin(const T1& x) +KFR_INTRIN T1 asin(const T1& x) { - return internal::in_asin_acos<>::asin(x); + return internal::asin(x); } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_asin, E1> asin(E1&& x) +KFR_INTRIN expr_func<internal::fn_asin, E1> asin(E1&& x) { - return { fn_asin(), std::forward<E1>(x) }; + return { {}, std::forward<E1>(x) }; } -using fn_acos = internal::in_asin_acos<>::fn_acos; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> acos(const T1& x) +KFR_INTRIN T1 acos(const T1& x) { - return internal::in_asin_acos<>::acos(x); + return internal::acos(x); } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_acos, E1> acos(E1&& x) +KFR_INTRIN expr_func<internal::fn_acos, E1> acos(E1&& x) { - return { fn_acos(), std::forward<E1>(x) }; + return { {}, std::forward<E1>(x) }; } } -} - -#pragma clang diagnostic pop diff --git a/include/kfr/base/atan.hpp b/include/kfr/base/atan.hpp @@ -28,240 +28,231 @@ #include "select.hpp" #include "sin_cos.hpp" -#pragma clang diagnostic push -#if CID_HAS_WARNING("-Winaccessible-base") -#pragma clang diagnostic ignored "-Winaccessible-base" -#endif - namespace kfr { namespace internal { -template <cpu_t c = cpu_t::native, cpu_t cc = c> -struct in_atan : in_trig<cc>, in_select<cc>, in_round<cc>, in_abs<cc> +template <size_t N> +KFR_SINTRIN vec<f32, N> atan2k(vec<f32, N> y, vec<f32, N> x) { -private: - using in_abs<cc>::abs; - using in_round<cc>::floor; - using in_select<cc>::select; - using in_trig<cc>::mask_horner; - using in_select<cc>::sign; + vec<f32, N> s, t, u; + vec<i32, N> q; + q = select(x < 0, -2, 0); + x = select(x < 0, -x, x); + mask<i32, N> m; + m = y > x; + t = x; + x = select(m, y, x); + y = select(m, -t, y); + q = select(m, q + 1, q); + s = y / x; + t = s * s; + u = 0.00282363896258175373077393f; + u = fmadd(u, t, -0.0159569028764963150024414f); + u = fmadd(u, t, 0.0425049886107444763183594f); + u = fmadd(u, t, -0.0748900920152664184570312f); + u = fmadd(u, t, 0.106347933411598205566406f); + u = fmadd(u, t, -0.142027363181114196777344f); + u = fmadd(u, t, 0.199926957488059997558594f); + u = fmadd(u, t, -0.333331018686294555664062f); + t = u * t * s + s; + t = cast<f32>(q) * 1.5707963267948966192313216916398f + t; + return t; +} -public: - template <size_t N> - KFR_SINTRIN vec<f32, N> atan2k(vec<f32, N> y, vec<f32, N> x) - { - vec<f32, N> s, t, u; - vec<i32, N> q; - q = select(x < 0, -2, 0); - x = select(x < 0, -x, x); - mask<i32, N> m; - m = y > x; - t = x; - x = select(m, y, x); - y = select(m, -t, y); - q = select(m, q + 1, q); - s = y / x; - t = s * s; - u = 0.00282363896258175373077393f; - u = fmadd(u, t, -0.0159569028764963150024414f); - u = fmadd(u, t, 0.0425049886107444763183594f); - u = fmadd(u, t, -0.0748900920152664184570312f); - u = fmadd(u, t, 0.106347933411598205566406f); - u = fmadd(u, t, -0.142027363181114196777344f); - u = fmadd(u, t, 0.199926957488059997558594f); - u = fmadd(u, t, -0.333331018686294555664062f); - t = u * t * s + s; - t = cast<f32>(q) * 1.5707963267948966192313216916398f + t; - return t; - } - template <size_t N> - KFR_SINTRIN vec<f64, N> atan2k(vec<f64, N> y, vec<f64, N> x) - { - vec<f64, N> s, t, u; - vec<i64, N> q; - q = select(x < 0, -2ll, 0ll); - x = select(x < 0, -x, x); - vec<i64, N> m; - m = y > x; - t = x; - x = select(m, y, x); - y = select(m, -t, y); - q = select(m, q + 1ll, q); - s = y / x; - t = s * s; - u = -1.88796008463073496563746e-05; - u = fmadd(u, t, 0.000209850076645816976906797); - u = fmadd(u, t, -0.00110611831486672482563471); - u = fmadd(u, t, 0.00370026744188713119232403); - u = fmadd(u, t, -0.00889896195887655491740809); - u = fmadd(u, t, 0.016599329773529201970117); - u = fmadd(u, t, -0.0254517624932312641616861); - u = fmadd(u, t, 0.0337852580001353069993897); - u = fmadd(u, t, -0.0407629191276836500001934); - u = fmadd(u, t, 0.0466667150077840625632675); - u = fmadd(u, t, -0.0523674852303482457616113); - u = fmadd(u, t, 0.0587666392926673580854313); - u = fmadd(u, t, -0.0666573579361080525984562); - u = fmadd(u, t, 0.0769219538311769618355029); - u = fmadd(u, t, -0.090908995008245008229153); - u = fmadd(u, t, 0.111111105648261418443745); - u = fmadd(u, t, -0.14285714266771329383765); - u = fmadd(u, t, 0.199999999996591265594148); - u = fmadd(u, t, -0.333333333333311110369124); - t = u * t * s + s; - t = cast<f64>(q) * 1.5707963267948966192313216916398 + t; - return t; - } - template <size_t N> - KFR_SINTRIN vec<f32, N> atan2(vec<f32, N> y, vec<f32, N> x) - { - vec<f32, N> r = atan2k(abs(y), x); - constexpr f32 pi = 3.1415926535897932384626433832795f; - constexpr f32 pi_over_2 = 1.5707963267948966192313216916398f; - constexpr f32 pi_over_4 = 0.78539816339744830961566084581988f; - r = mulsign(r, x); - r = select(isinf(x) || x == 0.0f, pi_over_2 - select(x.asmask(), mulsign(pi_over_2, x), 0.0f), r); - r = select(isinf(y), pi_over_2 - select(x.asmask(), mulsign(pi_over_4, x), 0.0f), r); - r = select(y == 0.0f, fbitcast(ibitcast(sign(x) == -1.0f) & ibitcast(pi)), r); - r = fbitcast(ibitcast(isnan(x) || isnan(y)) | ibitcast(mulsign(r, y))); - return r; - } - template <size_t N> - KFR_SINTRIN vec<f64, N> atan2(vec<f64, N> y, vec<f64, N> x) - { - vec<f64, N> r = atan2k(abs(y), x); - constexpr f64 pi = 3.1415926535897932384626433832795; - constexpr f64 pi_over_2 = 1.5707963267948966192313216916398; - constexpr f64 pi_over_4 = 0.78539816339744830961566084581988; - r = mulsign(r, x); - r = select(isinf(x) || x == 0.0, pi_over_2 - select(x, mulsign(pi_over_2, x), 0.0), r); - r = select(isinf(y), pi_over_2 - select(x, mulsign(pi_over_4, x), 0.0), r); - r = select(y == 0.0, fbitcast(ibitcast(sign(x) == -1.0) & ibitcast(pi)), r); - r = fbitcast(ibitcast(isnan(x) || isnan(y)) | ibitcast(mulsign(r, y))); - return r; - } - template <size_t N> - KFR_SINTRIN vec<f32, N> atan(vec<f32, N> s) - { - vec<f32, N> t, u; - vec<i32, N> q; - q = select(s < 0.f, 2, 0); - s = select(s < 0.f, -s, s); - q = select(s > 1.f, q | 1, q); - s = select(s > 1.f, 1.0f / s, s); - t = s * s; - u = 0.00282363896258175373077393f; - u = fmadd(u, t, -0.0159569028764963150024414f); - u = fmadd(u, t, 0.0425049886107444763183594f); - u = fmadd(u, t, -0.0748900920152664184570312f); - u = fmadd(u, t, 0.106347933411598205566406f); - u = fmadd(u, t, -0.142027363181114196777344f); - u = fmadd(u, t, 0.199926957488059997558594f); - u = fmadd(u, t, -0.333331018686294555664062f); - t = s + s * (t * u); - t = select((q & 1) != 0, 1.570796326794896557998982f - t, t); - t = select((q & 2) != 0, -t, t); - return t; - } - template <size_t N> - KFR_SINTRIN vec<f64, N> atan(vec<f64, N> s) - { - vec<f64, N> t, u; - vec<i64, N> q; - q = select(s < 0.0, 2ll, 0ll); - s = select(s < 0.0, -s, s); - q = select(s > 1.0, q | 1, q); - s = select(s > 1.0, 1.0 / s, s); - t = s * s; - u = -1.88796008463073496563746e-05; - u = fmadd(u, t, 0.000209850076645816976906797); - u = fmadd(u, t, -0.00110611831486672482563471); - u = fmadd(u, t, 0.00370026744188713119232403); - u = fmadd(u, t, -0.00889896195887655491740809); - u = fmadd(u, t, 0.016599329773529201970117); - u = fmadd(u, t, -0.0254517624932312641616861); - u = fmadd(u, t, 0.0337852580001353069993897); - u = fmadd(u, t, -0.0407629191276836500001934); - u = fmadd(u, t, 0.0466667150077840625632675); - u = fmadd(u, t, -0.0523674852303482457616113); - u = fmadd(u, t, 0.0587666392926673580854313); - u = fmadd(u, t, -0.0666573579361080525984562); - u = fmadd(u, t, 0.0769219538311769618355029); - u = fmadd(u, t, -0.090908995008245008229153); - u = fmadd(u, t, 0.111111105648261418443745); - u = fmadd(u, t, -0.14285714266771329383765); - u = fmadd(u, t, 0.199999999996591265594148); - u = fmadd(u, t, -0.333333333333311110369124); - t = s + s * (t * u); - t = select((q & 1) != 0, 1.570796326794896557998982 - t, t); - t = select((q & 2) != 0, -t, t); - return t; - } - template <typename T> - KFR_SINTRIN T atandeg(const T& x) - { - return atan(x) * c_radtodeg<T>; - } - template <typename T1, typename T2> - KFR_SINTRIN common_type<T1, T2> atan2deg(const T1& y, const T2& x) - { - return atan2(y, x) * c_radtodeg<common_type<T1, T2>>; - } - KFR_HANDLE_SCALAR(atan) - KFR_HANDLE_SCALAR(atan2) - KFR_SPEC_FN(in_atan, atan) - KFR_SPEC_FN(in_atan, atandeg) - KFR_SPEC_FN(in_atan, atan2) - KFR_SPEC_FN(in_atan, atan2deg) -}; +template <size_t N> +KFR_SINTRIN vec<f64, N> atan2k(vec<f64, N> y, vec<f64, N> x) +{ + vec<f64, N> s, t, u; + vec<i64, N> q; + q = select(x < 0, -2ll, 0ll); + x = select(x < 0, -x, x); + vec<i64, N> m; + m = y > x; + t = x; + x = select(m, y, x); + y = select(m, -t, y); + q = select(m, q + 1ll, q); + s = y / x; + t = s * s; + u = -1.88796008463073496563746e-05; + u = fmadd(u, t, 0.000209850076645816976906797); + u = fmadd(u, t, -0.00110611831486672482563471); + u = fmadd(u, t, 0.00370026744188713119232403); + u = fmadd(u, t, -0.00889896195887655491740809); + u = fmadd(u, t, 0.016599329773529201970117); + u = fmadd(u, t, -0.0254517624932312641616861); + u = fmadd(u, t, 0.0337852580001353069993897); + u = fmadd(u, t, -0.0407629191276836500001934); + u = fmadd(u, t, 0.0466667150077840625632675); + u = fmadd(u, t, -0.0523674852303482457616113); + u = fmadd(u, t, 0.0587666392926673580854313); + u = fmadd(u, t, -0.0666573579361080525984562); + u = fmadd(u, t, 0.0769219538311769618355029); + u = fmadd(u, t, -0.090908995008245008229153); + u = fmadd(u, t, 0.111111105648261418443745); + u = fmadd(u, t, -0.14285714266771329383765); + u = fmadd(u, t, 0.199999999996591265594148); + u = fmadd(u, t, -0.333333333333311110369124); + t = u * t * s + s; + t = cast<f64>(q) * 1.5707963267948966192313216916398 + t; + return t; } -namespace native + +template <size_t N> +KFR_SINTRIN vec<f32, N> atan2(vec<f32, N> y, vec<f32, N> x) { -using fn_atan = internal::in_atan<>::fn_atan; -template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)> -KFR_INLINE ftype<common_type<T1, T2>> atan(const T1& y, const T2& x) + vec<f32, N> r = atan2k(abs(y), x); + constexpr f32 pi = 3.1415926535897932384626433832795f; + constexpr f32 pi_over_2 = 1.5707963267948966192313216916398f; + constexpr f32 pi_over_4 = 0.78539816339744830961566084581988f; + r = mulsign(r, x); + r = select(isinf(x) || x == 0.0f, pi_over_2 - select(x.asmask(), mulsign(pi_over_2, x), 0.0f), r); + r = select(isinf(y), pi_over_2 - select(x.asmask(), mulsign(pi_over_4, x), 0.0f), r); + r = select(y == 0.0f, fbitcast(ibitcast(x < 0) & ibitcast(pi)), r); + r = fbitcast(ibitcast(isnan(x) || isnan(y)) | ibitcast(mulsign(r, y))); + return r; +} + +template <size_t N> +KFR_SINTRIN vec<f64, N> atan2(vec<f64, N> y, vec<f64, N> x) { - return internal::in_atan<>::atan(y, x); + vec<f64, N> r = atan2k(abs(y), x); + constexpr f64 pi = 3.1415926535897932384626433832795; + constexpr f64 pi_over_2 = 1.5707963267948966192313216916398; + constexpr f64 pi_over_4 = 0.78539816339744830961566084581988; + r = mulsign(r, x); + r = select(isinf(x) || x == 0.0, pi_over_2 - select(x, mulsign(pi_over_2, x), 0.0), r); + r = select(isinf(y), pi_over_2 - select(x, mulsign(pi_over_4, x), 0.0), r); + r = select(y == 0.0, fbitcast(ibitcast(x < 0) & ibitcast(pi)), r); + r = fbitcast(ibitcast(isnan(x) || isnan(y)) | ibitcast(mulsign(r, y))); + return r; } -template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)> -KFR_INLINE expr_func<fn_atan, E1, E2> atan(E1&& y, E2&& x) + +template <size_t N> +KFR_SINTRIN vec<f32, N> atan(vec<f32, N> s) { - return { fn_atan(), std::forward<E1>(y), std::forward<E2>(x) }; + vec<f32, N> t, u; + vec<i32, N> q; + q = select(s < 0.f, 2, 0); + s = select(s < 0.f, -s, s); + q = select(s > 1.f, q | 1, q); + s = select(s > 1.f, 1.0f / s, s); + t = s * s; + u = 0.00282363896258175373077393f; + u = fmadd(u, t, -0.0159569028764963150024414f); + u = fmadd(u, t, 0.0425049886107444763183594f); + u = fmadd(u, t, -0.0748900920152664184570312f); + u = fmadd(u, t, 0.106347933411598205566406f); + u = fmadd(u, t, -0.142027363181114196777344f); + u = fmadd(u, t, 0.199926957488059997558594f); + u = fmadd(u, t, -0.333331018686294555664062f); + t = s + s * (t * u); + t = select((q & 1) != 0, 1.570796326794896557998982f - t, t); + t = select((q & 2) != 0, -t, t); + return t; } -using fn_atan2 = internal::in_atan<>::fn_atan2; -template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)> -KFR_INLINE ftype<common_type<T1, T2>> atan2(const T1& y, const T2& x) + +template <size_t N> +KFR_SINTRIN vec<f64, N> atan(vec<f64, N> s) { - return internal::in_atan<>::atan2(y, x); + vec<f64, N> t, u; + vec<i64, N> q; + q = select(s < 0.0, 2ll, 0ll); + s = select(s < 0.0, -s, s); + q = select(s > 1.0, q | 1, q); + s = select(s > 1.0, 1.0 / s, s); + t = s * s; + u = -1.88796008463073496563746e-05; + u = fmadd(u, t, 0.000209850076645816976906797); + u = fmadd(u, t, -0.00110611831486672482563471); + u = fmadd(u, t, 0.00370026744188713119232403); + u = fmadd(u, t, -0.00889896195887655491740809); + u = fmadd(u, t, 0.016599329773529201970117); + u = fmadd(u, t, -0.0254517624932312641616861); + u = fmadd(u, t, 0.0337852580001353069993897); + u = fmadd(u, t, -0.0407629191276836500001934); + u = fmadd(u, t, 0.0466667150077840625632675); + u = fmadd(u, t, -0.0523674852303482457616113); + u = fmadd(u, t, 0.0587666392926673580854313); + u = fmadd(u, t, -0.0666573579361080525984562); + u = fmadd(u, t, 0.0769219538311769618355029); + u = fmadd(u, t, -0.090908995008245008229153); + u = fmadd(u, t, 0.111111105648261418443745); + u = fmadd(u, t, -0.14285714266771329383765); + u = fmadd(u, t, 0.199999999996591265594148); + u = fmadd(u, t, -0.333333333333311110369124); + t = s + s * (t * u); + t = select((q & 1) != 0, 1.570796326794896557998982 - t, t); + t = select((q & 2) != 0, -t, t); + return t; } -template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)> -KFR_INLINE expr_func<fn_atan2, E1, E2> atan2(E1&& y, E2&& x) + +template <typename T> +KFR_SINTRIN T atandeg(const T& x) { - return { fn_atan2(), std::forward<E1>(y), std::forward<E2>(x) }; + return atan(x) * c_radtodeg<T>; +} + +template <typename T1, typename T2> +KFR_SINTRIN common_type<T1, T2> atan2deg(const T1& y, const T2& x) +{ + return atan2(y, x) * c_radtodeg<common_type<T1, T2>>; +} + +KFR_HANDLE_SCALAR(atan) +KFR_HANDLE_SCALAR(atan2) +KFR_FN(atan) +KFR_FN(atandeg) +KFR_FN(atan2) +KFR_FN(atan2deg) } -using fn_atandeg = internal::in_atan<>::fn_atandeg; + +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> +KFR_INTRIN ftype<T1> atan(const T1& x) +{ + return internal::atan(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> +KFR_INTRIN expr_func<internal::fn_atan, E1> atan(E1&& x) +{ + return { {}, std::forward<E1>(x) }; +} + +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> +KFR_INTRIN ftype<T1> atandeg(const T1& x) +{ + return internal::atandeg(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> +KFR_INTRIN expr_func<internal::fn_atandeg, E1> atandeg(E1&& x) +{ + return { {}, std::forward<E1>(x) }; +} + template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)> -KFR_INLINE ftype<common_type<T1, T2>> atandeg(const T1& y, const T2& x) +KFR_INTRIN common_type<T1, T2> atan2(const T1& x, const T2& y) { - return internal::in_atan<>::atandeg(y, x); + return internal::atan2(x, y); } + template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)> -KFR_INLINE expr_func<fn_atandeg, E1, E2> atandeg(E1&& y, E2&& x) +KFR_INTRIN expr_func<internal::fn_atan2, E1, E2> atan2(E1&& x, E2&& y) { - return { fn_atandeg(), std::forward<E1>(y), std::forward<E2>(x) }; + return { {}, std::forward<E1>(x), std::forward<E2>(y) }; } -using fn_atan2deg = internal::in_atan<>::fn_atan2deg; + template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)> -KFR_INLINE ftype<common_type<T1, T2>> atan2deg(const T1& y, const T2& x) +KFR_INTRIN common_type<T1, T2> atan2deg(const T1& x, const T2& y) { - return internal::in_atan<>::atan2deg(y, x); + return internal::atan2deg(x, y); } + template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)> -KFR_INLINE expr_func<fn_atan2deg, E1, E2> atan2deg(E1&& y, E2&& x) +KFR_INTRIN expr_func<internal::fn_atan2deg, E1, E2> atan2deg(E1&& x, E2&& y) { - return { fn_atan2deg(), std::forward<E1>(y), std::forward<E2>(x) }; -} + return { {}, std::forward<E1>(x), std::forward<E2>(y) }; } } -#pragma clang diagnostic pop diff --git a/include/kfr/base/clamp.hpp b/include/kfr/base/clamp.hpp @@ -0,0 +1,73 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +#include "min_max.hpp" + +namespace kfr +{ + +namespace internal +{ + +template <typename T, size_t N> +KFR_SINTRIN vec<T, N> clamp(vec<T, N> x, vec<T, N> lo, vec<T, N> hi) +{ + return max(min(x, hi), lo); +} + +template <typename T, size_t N> +KFR_SINTRIN vec<T, N> clamp(vec<T, N> x, vec<T, N> hi) +{ + return max(min(x, hi), zerovector<T, N>()); +} + +KFR_I_FN(clamp) +} + +template <typename T1, typename T2, typename T3, KFR_ENABLE_IF(is_numeric_args<T1, T2, T3>::value), + typename Tout = common_type<T1, T2, T3>> +KFR_INTRIN Tout clamp(const T1& x, const T2& lo, const T3& hi) +{ + return internal::clamp(static_cast<Tout>(x), static_cast<Tout>(lo), static_cast<Tout>(hi)); +} + +template <typename E1, typename E2, typename E3, KFR_ENABLE_IF(is_input_expressions<E1, E2, E3>::value)> +KFR_INTRIN expr_func<internal::fn_clamp, E1, E2, E3> clamp(E1&& x, E2&& lo, E3&& hi) +{ + return { {}, std::forward<E1>(x), std::forward<E2>(lo), std::forward<E3>(hi) }; +} + +template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value), + typename Tout = common_type<T1, T2>> +KFR_INTRIN Tout clamp(const T1& x, const T2& hi) +{ + return internal::clamp(static_cast<Tout>(x), static_cast<Tout>(hi)); +} + +template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)> +KFR_INTRIN expr_func<internal::fn_clamp, E1, E2> clamp(E1&& x, E2&& hi) +{ + return { {}, std::forward<E1>(x), std::forward<E2>(hi) }; +} +} diff --git a/include/kfr/base/complex.hpp b/include/kfr/base/complex.hpp @@ -79,9 +79,31 @@ struct complex T re; T im; }; - #endif #endif +} +namespace cometa +{ +template <typename T> +struct compound_type_traits<kfr::complex<T>> +{ + constexpr static size_t width = 2; + using subtype = T; + using deep_subtype = cometa::deep_subtype<T>; + constexpr static bool is_scalar = false; + template <typename U> + using rebind = kfr::complex<U>; + template <typename U> + using deep_rebind = kfr::complex<cometa::deep_rebind<subtype, U>>; + + static constexpr subtype at(const kfr::complex<T>& value, size_t index) + { + return index == 0 ? value.real() : value.imag(); + } +}; +} +namespace kfr +{ using c32 = complex<f32>; using c64 = complex<f64>; @@ -262,363 +284,280 @@ constexpr KFR_INLINE complex<T> make_complex(T1 real, T2 imag = T2(0)) namespace internal { -template <cpu_t c = cpu_t::native> -struct in_complex : in_select<c>, in_sin_cos<c>, in_hyperbolic<c>, in_sqrt<c>, in_atan<c>, in_log_exp<c> -{ - constexpr static cpu_t cur = c; - using in_sqrt<c>::sqrt; - using in_sin_cos<c>::sincos; - using in_sin_cos<c>::cossin; - using in_hyperbolic<c>::sinhcosh; - using in_hyperbolic<c>::coshsinh; - using in_atan<c>::atan2; - using in_log_exp<c>::log; - using in_log_exp<c>::log2; - using in_log_exp<c>::log10; - using in_log_exp<c>::exp; - using in_log_exp<c>::exp2; - using in_log_exp<c>::exp10; - - template <typename T, size_t N> - KFR_SINTRIN vec<complex<T>, N> csin(const vec<complex<T>, N>& x) - { - return ccomp(sincos(cdecom(cdupreal(x))) * coshsinh(cdecom(cdupimag(x)))); - } - template <typename T, size_t N> - KFR_SINTRIN vec<complex<T>, N> csinh(const vec<complex<T>, N>& x) - { - return ccomp(sinhcosh(cdecom(cdupreal(x))) * cossin(cdecom(cdupimag(x)))); - } - template <typename T, size_t N> - KFR_SINTRIN vec<complex<T>, N> ccos(const vec<complex<T>, N>& x) - { - return ccomp(negodd(cossin(cdecom(cdupreal(x))) * coshsinh(cdecom(cdupimag(x))))); - } - template <typename T, size_t N> - KFR_SINTRIN vec<complex<T>, N> ccosh(const vec<complex<T>, N>& x) - { - return ccomp(coshsinh(cdecom(cdupreal(x))) * cossin(cdecom(cdupimag(x)))); - } - - template <typename T, size_t N> - KFR_SINTRIN vec<T, N> cabs(const vec<complex<T>, N>& x) - { - const vec<T, N* 2> xx = sqr(cdecom(x)); - return sqrt(even(xx) + odd(xx)); - } - template <typename T, size_t N> - KFR_SINTRIN vec<T, N> carg(const vec<complex<T>, N>& x) - { - const vec<T, N* 2> xx = cdecom(x); - return atan2(even(xx), odd(xx)); - } +template <typename T, size_t N> +KFR_SINTRIN vec<complex<T>, N> csin(const vec<complex<T>, N>& x) +{ + return ccomp(sincos(cdecom(cdupreal(x))) * coshsinh(cdecom(cdupimag(x)))); +} +template <typename T, size_t N> +KFR_SINTRIN vec<complex<T>, N> csinh(const vec<complex<T>, N>& x) +{ + return ccomp(sinhcosh(cdecom(cdupreal(x))) * cossin(cdecom(cdupimag(x)))); +} +template <typename T, size_t N> +KFR_SINTRIN vec<complex<T>, N> ccos(const vec<complex<T>, N>& x) +{ + return ccomp(negodd(cossin(cdecom(cdupreal(x))) * coshsinh(cdecom(cdupimag(x))))); +} +template <typename T, size_t N> +KFR_SINTRIN vec<complex<T>, N> ccosh(const vec<complex<T>, N>& x) +{ + return ccomp(coshsinh(cdecom(cdupreal(x))) * cossin(cdecom(cdupimag(x)))); +} - template <typename T, size_t N> - KFR_SINTRIN vec<complex<T>, N> clog(const vec<complex<T>, N>& x) - { - return make_complex(log(cabs(x)), carg(x)); - } - template <typename T, size_t N> - KFR_SINTRIN vec<complex<T>, N> clog2(const vec<complex<T>, N>& x) - { - return clog(x) * c_recip_log_2<T>; - } - template <typename T, size_t N> - KFR_SINTRIN vec<complex<T>, N> clog10(const vec<complex<T>, N>& x) - { - return clog(x) * c_recip_log_10<T>; - } +template <typename T, size_t N> +KFR_SINTRIN vec<T, N> cabs(const vec<complex<T>, N>& x) +{ + const vec<T, N* 2> xx = sqr(cdecom(x)); + return sqrt(even(xx) + odd(xx)); +} +template <typename T, size_t N> +KFR_SINTRIN vec<T, N> carg(const vec<complex<T>, N>& x) +{ + const vec<T, N* 2> xx = cdecom(x); + return atan2(even(xx), odd(xx)); +} - template <typename T, size_t N> - KFR_SINTRIN vec<complex<T>, N> cexp(const vec<complex<T>, N>& x) - { - return ccomp(exp(cdecom(cdupreal(x))) * cossin(cdecom(cdupimag(x)))); - } - template <typename T, size_t N> - KFR_SINTRIN vec<complex<T>, N> cexp2(const vec<complex<T>, N>& x) - { - return cexp(x * c_log_2<T>); - } - template <typename T, size_t N> - KFR_SINTRIN vec<complex<T>, N> cexp10(const vec<complex<T>, N>& x) - { - return cexp(x * c_log_10<T>); - } +template <typename T, size_t N> +KFR_SINTRIN vec<complex<T>, N> clog(const vec<complex<T>, N>& x) +{ + return make_complex(log(cabs(x)), carg(x)); +} +template <typename T, size_t N> +KFR_SINTRIN vec<complex<T>, N> clog2(const vec<complex<T>, N>& x) +{ + return clog(x) * c_recip_log_2<T>; +} +template <typename T, size_t N> +KFR_SINTRIN vec<complex<T>, N> clog10(const vec<complex<T>, N>& x) +{ + return clog(x) * c_recip_log_10<T>; +} - template <typename T, size_t N> - KFR_SINTRIN vec<complex<T>, N> polar(const vec<complex<T>, N>& x) - { - return make_complex(cabs(x), carg(x)); - } - template <typename T, size_t N> - KFR_SINTRIN vec<complex<T>, N> cartesian(const vec<complex<T>, N>& x) - { - return cdupreal(x) * ccomp(cossin(cdecom(cdupimag(x)))); - } +template <typename T, size_t N> +KFR_SINTRIN vec<complex<T>, N> cexp(const vec<complex<T>, N>& x) +{ + return ccomp(exp(cdecom(cdupreal(x))) * cossin(cdecom(cdupimag(x)))); +} +template <typename T, size_t N> +KFR_SINTRIN vec<complex<T>, N> cexp2(const vec<complex<T>, N>& x) +{ + return cexp(x * c_log_2<T>); +} +template <typename T, size_t N> +KFR_SINTRIN vec<complex<T>, N> cexp10(const vec<complex<T>, N>& x) +{ + return cexp(x * c_log_10<T>); +} - template <typename T, size_t N> - KFR_SINTRIN vec<T, N> cabsdup(vec<T, N> x) - { - x = sqr(x); - return sqrt(x + swap<2>(x)); - } +template <typename T, size_t N> +KFR_SINTRIN vec<complex<T>, N> polar(const vec<complex<T>, N>& x) +{ + return make_complex(cabs(x), carg(x)); +} +template <typename T, size_t N> +KFR_SINTRIN vec<complex<T>, N> cartesian(const vec<complex<T>, N>& x) +{ + return cdupreal(x) * ccomp(cossin(cdecom(cdupimag(x)))); +} - template <typename T, size_t N> - KFR_SINTRIN vec<complex<T>, N> csqrt(const vec<complex<T>, N>& x) - { - const vec<T, N> t = (cabsdup(cdecom(x)) + cdecom(cnegimag(cdupreal(x)))) * T(0.5); - return ccomp(select(dupodd(x) < T(), cdecom(cnegimag(ccomp(t))), t)); - } +template <typename T, size_t N> +KFR_SINTRIN vec<T, N> cabsdup(vec<T, N> x) +{ + x = sqr(x); + return sqrt(x + swap<2>(x)); +} - KFR_HANDLE_SCALAR(csin) - KFR_HANDLE_SCALAR(csinh) - KFR_HANDLE_SCALAR(ccos) - KFR_HANDLE_SCALAR(ccosh) - KFR_HANDLE_SCALAR(cabs) - KFR_HANDLE_SCALAR(carg) - KFR_HANDLE_SCALAR(clog) - KFR_HANDLE_SCALAR(clog2) - KFR_HANDLE_SCALAR(clog10) - KFR_HANDLE_SCALAR(cexp) - KFR_HANDLE_SCALAR(cexp2) - KFR_HANDLE_SCALAR(cexp10) - KFR_HANDLE_SCALAR(polar) - KFR_HANDLE_SCALAR(cartesian) - KFR_HANDLE_SCALAR(csqrt) - - KFR_SPEC_FN(in_complex, csin) - KFR_SPEC_FN(in_complex, csinh) - KFR_SPEC_FN(in_complex, ccos) - KFR_SPEC_FN(in_complex, ccosh) - KFR_SPEC_FN(in_complex, cabs) - KFR_SPEC_FN(in_complex, carg) - KFR_SPEC_FN(in_complex, clog) - KFR_SPEC_FN(in_complex, clog2) - KFR_SPEC_FN(in_complex, clog10) - KFR_SPEC_FN(in_complex, cexp) - KFR_SPEC_FN(in_complex, cexp2) - KFR_SPEC_FN(in_complex, cexp10) - KFR_SPEC_FN(in_complex, polar) - KFR_SPEC_FN(in_complex, cartesian) - KFR_SPEC_FN(in_complex, csqrt) -}; +template <typename T, size_t N> +KFR_SINTRIN vec<complex<T>, N> csqrt(const vec<complex<T>, N>& x) +{ + const vec<T, N> t = (cabsdup(cdecom(x)) + cdecom(cnegimag(cdupreal(x)))) * T(0.5); + return ccomp(select(dupodd(x) < T(), cdecom(cnegimag(ccomp(t))), t)); +} + +KFR_HANDLE_SCALAR(csin) +KFR_HANDLE_SCALAR(csinh) +KFR_HANDLE_SCALAR(ccos) +KFR_HANDLE_SCALAR(ccosh) +KFR_HANDLE_SCALAR(cabs) +KFR_HANDLE_SCALAR(carg) +KFR_HANDLE_SCALAR(clog) +KFR_HANDLE_SCALAR(clog2) +KFR_HANDLE_SCALAR(clog10) +KFR_HANDLE_SCALAR(cexp) +KFR_HANDLE_SCALAR(cexp2) +KFR_HANDLE_SCALAR(cexp10) +KFR_HANDLE_SCALAR(polar) +KFR_HANDLE_SCALAR(cartesian) +KFR_HANDLE_SCALAR(csqrt) + +KFR_FN(csin) +KFR_FN(csinh) +KFR_FN(ccos) +KFR_FN(ccosh) +KFR_FN(cabs) +KFR_FN(carg) +KFR_FN(clog) +KFR_FN(clog2) +KFR_FN(clog10) +KFR_FN(cexp) +KFR_FN(cexp2) +KFR_FN(cexp10) +KFR_FN(polar) +KFR_FN(cartesian) +KFR_FN(csqrt) } -namespace native -{ -using fn_csin = internal::in_complex<>::fn_csin; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> csin(const T1& x) +KFR_INTRIN T1 csin(const T1& x) { - return internal::in_complex<>::csin(x); + return internal::csin(x); } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_csin, E1> csin(E1&& x) +KFR_INTRIN expr_func<internal::fn_csin, E1> csin(E1&& x) { return { {}, std::forward<E1>(x) }; } - -using fn_csinh = internal::in_complex<>::fn_csinh; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> csinh(const T1& x) +KFR_INTRIN T1 csinh(const T1& x) { - return internal::in_complex<>::csinh(x); + return internal::csinh(x); } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_csinh, E1> csinh(E1&& x) +KFR_INTRIN expr_func<internal::fn_csinh, E1> csinh(E1&& x) { return { {}, std::forward<E1>(x) }; } - -using fn_ccos = internal::in_complex<>::fn_ccos; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> ccos(const T1& x) +KFR_INTRIN T1 ccos(const T1& x) { - return internal::in_complex<>::ccos(x); + return internal::ccos(x); } - template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_ccos, E1> ccos(E1&& x) +KFR_INTRIN expr_func<internal::fn_ccos, E1> ccos(E1&& x) { - return { fn_ccos(), std::forward<E1>(x) }; + return { {}, std::forward<E1>(x) }; } - -using fn_ccosh = internal::in_complex<>::fn_ccosh; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> ccosh(const T1& x) +KFR_INTRIN T1 ccosh(const T1& x) { - return internal::in_complex<>::ccosh(x); + return internal::ccosh(x); } - template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_ccosh, E1> ccosh(E1&& x) +KFR_INTRIN expr_func<internal::fn_ccosh, E1> ccosh(E1&& x) { - return { fn_ccosh(), std::forward<E1>(x) }; + return { {}, std::forward<E1>(x) }; } - -using fn_cabs = internal::in_complex<>::fn_cabs; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN realftype<T1> cabs(const T1& x) +KFR_INTRIN realtype<T1> cabs(const T1& x) { - return internal::in_complex<>::cabs(x); + return internal::cabs(x); } - template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_cabs, E1> cabs(E1&& x) +KFR_INTRIN expr_func<internal::fn_cabs, E1> cabs(E1&& x) { - return { fn_cabs(), std::forward<E1>(x) }; + return { {}, std::forward<E1>(x) }; } - -using fn_carg = internal::in_complex<>::fn_carg; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN realftype<T1> carg(const T1& x) +KFR_INTRIN realtype<T1> carg(const T1& x) { - return internal::in_complex<>::carg(x); + return internal::carg(x); } - template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_carg, E1> carg(E1&& x) +KFR_INTRIN expr_func<internal::fn_carg, E1> carg(E1&& x) { - return { fn_carg(), std::forward<E1>(x) }; + return { {}, std::forward<E1>(x) }; } - -using fn_clog = internal::in_complex<>::fn_clog; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> clog(const T1& x) +KFR_INTRIN T1 clog(const T1& x) { - return internal::in_complex<>::clog(x); + return internal::clog(x); } - template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_clog, E1> clog(E1&& x) +KFR_INTRIN expr_func<internal::fn_clog, E1> clog(E1&& x) { - return { fn_clog(), std::forward<E1>(x) }; + return { {}, std::forward<E1>(x) }; } - -using fn_clog2 = internal::in_complex<>::fn_clog2; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> clog2(const T1& x) +KFR_INTRIN T1 clog2(const T1& x) { - return internal::in_complex<>::clog2(x); + return internal::clog2(x); } - template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_clog2, E1> clog2(E1&& x) +KFR_INTRIN expr_func<internal::fn_clog2, E1> clog2(E1&& x) { - return { fn_clog2(), std::forward<E1>(x) }; + return { {}, std::forward<E1>(x) }; } - -using fn_clog10 = internal::in_complex<>::fn_clog10; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> clog10(const T1& x) +KFR_INTRIN T1 clog10(const T1& x) { - return internal::in_complex<>::clog10(x); + return internal::clog10(x); } - template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_clog10, E1> clog10(E1&& x) +KFR_INTRIN expr_func<internal::fn_clog10, E1> clog10(E1&& x) { - return { fn_clog10(), std::forward<E1>(x) }; + return { {}, std::forward<E1>(x) }; } - -using fn_cexp = internal::in_complex<>::fn_cexp; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> cexp(const T1& x) +KFR_INTRIN T1 cexp(const T1& x) { - return internal::in_complex<>::cexp(x); + return internal::cexp(x); } - template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_cexp, E1> cexp(E1&& x) +KFR_INTRIN expr_func<internal::fn_cexp, E1> cexp(E1&& x) { - return { fn_cexp(), std::forward<E1>(x) }; + return { {}, std::forward<E1>(x) }; } - -using fn_cexp2 = internal::in_complex<>::fn_cexp2; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> cexp2(const T1& x) +KFR_INTRIN T1 cexp2(const T1& x) { - return internal::in_complex<>::cexp2(x); + return internal::cexp2(x); } - template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_cexp2, E1> cexp2(E1&& x) +KFR_INTRIN expr_func<internal::fn_cexp2, E1> cexp2(E1&& x) { - return { fn_cexp2(), std::forward<E1>(x) }; + return { {}, std::forward<E1>(x) }; } - -using fn_cexp10 = internal::in_complex<>::fn_cexp10; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> cexp10(const T1& x) +KFR_INTRIN T1 cexp10(const T1& x) { - return internal::in_complex<>::cexp10(x); + return internal::cexp10(x); } - template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_cexp10, E1> cexp10(E1&& x) +KFR_INTRIN expr_func<internal::fn_cexp10, E1> cexp10(E1&& x) { - return { fn_cexp10(), std::forward<E1>(x) }; + return { {}, std::forward<E1>(x) }; } - -using fn_polar = internal::in_complex<>::fn_polar; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> polar(const T1& x) +KFR_INTRIN T1 polar(const T1& x) { - return internal::in_complex<>::polar(x); + return internal::polar(x); } - template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_polar, E1> polar(E1&& x) +KFR_INTRIN expr_func<internal::fn_polar, E1> polar(E1&& x) { - return { fn_polar(), std::forward<E1>(x) }; + return { {}, std::forward<E1>(x) }; } - -using fn_cartesian = internal::in_complex<>::fn_cartesian; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> cartesian(const T1& x) +KFR_INTRIN T1 cartesian(const T1& x) { - return internal::in_complex<>::cartesian(x); + return internal::cartesian(x); } - template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_cartesian, E1> cartesian(E1&& x) +KFR_INTRIN expr_func<internal::fn_cartesian, E1> cartesian(E1&& x) { - return { fn_cartesian(), std::forward<E1>(x) }; + return { {}, std::forward<E1>(x) }; } - -using fn_csqrt = internal::in_complex<>::fn_csqrt; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> csqrt(const T1& x) +KFR_INTRIN T1 csqrt(const T1& x) { - return internal::in_complex<>::csqrt(x); + return internal::csqrt(x); } - template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_csqrt, E1> csqrt(E1&& x) +KFR_INTRIN expr_func<internal::fn_csqrt, E1> csqrt(E1&& x) { - return { fn_csqrt(), std::forward<E1>(x) }; -} -} + return { {}, std::forward<E1>(x) }; } -namespace cometa -{ -template <typename T> -struct compound_type_traits<kfr::complex<T>> -{ - constexpr static size_t width = 2; - using subtype = T; - using deep_subtype = cometa::deep_subtype<T>; - constexpr static bool is_scalar = false; - template <typename U> - using rebind = kfr::complex<U>; - template <typename U> - using deep_rebind = kfr::complex<cometa::deep_rebind<subtype, U>>; - - static constexpr subtype at(const kfr::complex<T>& value, size_t index) - { - return index == 0 ? value.real() : value.imag(); - } -}; } #pragma clang diagnostic pop diff --git a/include/kfr/base/gamma.hpp b/include/kfr/base/gamma.hpp @@ -41,63 +41,51 @@ constexpr T gamma_precalc[] = { -0x1.62f981f01cf84p+8, 0x5.a937aa5c48d98p+0, -0x3.c640bf82e2104p-8, 0xc.914c540f959cp-24, }; -template <cpu_t c = cpu_t::native, cpu_t cc = c> -struct in_gamma : in_log_exp<cc> +template <typename T, size_t N> +KFR_SINTRIN vec<T, N> gamma(vec<T, N> z) { -private: - using in_log_exp<cc>::exp; - using in_log_exp<cc>::pow; - -public: - template <typename T, size_t N> - KFR_SINTRIN vec<T, N> gamma(vec<T, N> z) - { - constexpr size_t Count = arraysize(internal::gamma_precalc<T>); - vec<T, N> accm = gamma_precalc<T>[0]; - KFR_LOOP_UNROLL - for (size_t k = 1; k < Count; k++) - accm += gamma_precalc<T>[k] / (z + cast<utype<T>>(k)); - accm *= exp(-(z + Count)) * pow(z + Count, z + 0.5); - return accm / z; - } - - template <typename T, size_t N> - KFR_SINTRIN vec<T, N> factorial_approx(vec<T, N> x) - { - return gamma(x + T(1)); - } - KFR_SPEC_FN(in_gamma, gamma) - KFR_SPEC_FN(in_gamma, factorial_approx) -}; + constexpr size_t Count = arraysize(gamma_precalc<T>); + vec<T, N> accm = gamma_precalc<T>[0]; + KFR_LOOP_UNROLL + for (size_t k = 1; k < Count; k++) + accm += gamma_precalc<T>[k] / (z + cast<utype<T>>(k)); + accm *= exp(-(z + Count)) * pow(z + Count, z + 0.5); + return accm / z; } -namespace native +template <typename T, size_t N> +KFR_SINTRIN vec<T, N> factorial_approx(vec<T, N> x) { -using fn_gamma = internal::in_gamma<>::fn_gamma; + return gamma(x + T(1)); +} +KFR_HANDLE_SCALAR(gamma) +KFR_HANDLE_SCALAR(factorial_approx) +KFR_FN(gamma) +KFR_FN(factorial_approx) +} + template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> gamma(const T1& x) +KFR_INTRIN T1 gamma(const T1& x) { - return internal::in_gamma<>::gamma(x); + return internal::gamma(x); } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_gamma, E1> gamma(E1&& x) +KFR_INTRIN expr_func<internal::fn_gamma, E1> gamma(E1&& x) { - return { fn_gamma(), std::forward<E1>(x) }; + return { {}, std::forward<E1>(x) }; } -using fn_factorial_approx = internal::in_gamma<>::fn_factorial_approx; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> factorial_approx(const T1& x) +KFR_INTRIN T1 factorial_approx(const T1& x) { - return internal::in_gamma<>::factorial_approx(x); + return internal::factorial_approx(x); } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_factorial_approx, E1> factorial_approx(E1&& x) +KFR_INTRIN expr_func<internal::fn_factorial_approx, E1> factorial_approx(E1&& x) { - return { fn_factorial_approx(), std::forward<E1>(x) }; -} + return { {}, std::forward<E1>(x) }; } } diff --git a/include/kfr/base/hyperbolic.hpp b/include/kfr/base/hyperbolic.hpp @@ -35,149 +35,131 @@ namespace kfr namespace internal { -template <cpu_t c = cpu_t::native> -struct in_hyperbolic : in_log_exp<c> -{ - constexpr static cpu_t cur = c; - -private: - using in_log_exp<c>::exp; - -public: - template <typename T, size_t N> - KFR_SINTRIN vec<T, N> sinh(vec<T, N> x) - { - return (exp(x) - exp(-x)) * T(0.5); - } - - template <typename T, size_t N> - KFR_SINTRIN vec<T, N> cosh(vec<T, N> x) - { - return (exp(x) + exp(-x)) * T(0.5); - } - - template <typename T, size_t N> - KFR_SINTRIN vec<T, N> tanh(vec<T, N> x) - { - x = -2 * x; - return (1 - exp(x)) / (1 + exp(x)); - } - - template <typename T, size_t N> - KFR_SINTRIN vec<T, N> coth(vec<T, N> x) - { - x = -2 * x; - return (1 + exp(x)) / (1 - exp(x)); - } - - template <typename T, size_t N, KFR_ENABLE_IF(N > 1)> - KFR_SINTRIN vec<T, N> sinhcosh(vec<T, N> x) - { - const vec<T, N> a = exp(x); - const vec<T, N> b = exp(-x); - return subadd(a, b) * T(0.5); - } - - template <typename T, size_t N, KFR_ENABLE_IF(N > 1)> - KFR_SINTRIN vec<T, N> coshsinh(vec<T, N> x) - { - const vec<T, N> a = exp(x); - const vec<T, N> b = exp(-x); - return addsub(a, b) * T(0.5); - } - KFR_HANDLE_SCALAR(sinh) - KFR_HANDLE_SCALAR(cosh) - KFR_HANDLE_SCALAR(tanh) - KFR_HANDLE_SCALAR(coth) - KFR_HANDLE_SCALAR(sinhcosh) - KFR_HANDLE_SCALAR(coshsinh) - KFR_SPEC_FN(in_hyperbolic, sinh) - KFR_SPEC_FN(in_hyperbolic, cosh) - KFR_SPEC_FN(in_hyperbolic, tanh) - KFR_SPEC_FN(in_hyperbolic, coth) - KFR_SPEC_FN(in_hyperbolic, sinhcosh) - KFR_SPEC_FN(in_hyperbolic, coshsinh) -}; -} - -namespace native -{ -using fn_sinh = internal::in_hyperbolic<>::fn_sinh; +template <typename T, size_t N> +KFR_SINTRIN vec<T, N> sinh(vec<T, N> x) +{ + return (exp(x) - exp(-x)) * T(0.5); +} + +template <typename T, size_t N> +KFR_SINTRIN vec<T, N> cosh(vec<T, N> x) +{ + return (exp(x) + exp(-x)) * T(0.5); +} + +template <typename T, size_t N> +KFR_SINTRIN vec<T, N> tanh(vec<T, N> x) +{ + x = -2 * x; + return (1 - exp(x)) / (1 + exp(x)); +} + +template <typename T, size_t N> +KFR_SINTRIN vec<T, N> coth(vec<T, N> x) +{ + x = -2 * x; + return (1 + exp(x)) / (1 - exp(x)); +} + +template <typename T, size_t N, KFR_ENABLE_IF(N > 1)> +KFR_SINTRIN vec<T, N> sinhcosh(vec<T, N> x) +{ + const vec<T, N> a = exp(x); + const vec<T, N> b = exp(-x); + return subadd(a, b) * T(0.5); +} + +template <typename T, size_t N, KFR_ENABLE_IF(N > 1)> +KFR_SINTRIN vec<T, N> coshsinh(vec<T, N> x) +{ + const vec<T, N> a = exp(x); + const vec<T, N> b = exp(-x); + return addsub(a, b) * T(0.5); +} + +KFR_HANDLE_SCALAR(sinh) +KFR_HANDLE_SCALAR(cosh) +KFR_HANDLE_SCALAR(tanh) +KFR_HANDLE_SCALAR(coth) +KFR_HANDLE_SCALAR(sinhcosh) +KFR_HANDLE_SCALAR(coshsinh) +KFR_FN(sinh) +KFR_FN(cosh) +KFR_FN(tanh) +KFR_FN(coth) +KFR_FN(sinhcosh) +KFR_FN(coshsinh) +} + template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> sinh(const T1& x) +KFR_INTRIN T1 sinh(const T1& x) { - return internal::in_hyperbolic<>::sinh(x); + return internal::sinh(x); } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_sinh, E1> sinh(E1&& x) +KFR_INTRIN expr_func<internal::fn_sinh, E1> sinh(E1&& x) { - return { fn_sinh(), std::forward<E1>(x) }; + return { {}, std::forward<E1>(x) }; } -using fn_cosh = internal::in_hyperbolic<>::fn_cosh; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> cosh(const T1& x) +KFR_INTRIN T1 cosh(const T1& x) { - return internal::in_hyperbolic<>::cosh(x); + return internal::cosh(x); } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_cosh, E1> cosh(E1&& x) +KFR_INTRIN expr_func<internal::fn_cosh, E1> cosh(E1&& x) { - return { fn_cosh(), std::forward<E1>(x) }; + return { {}, std::forward<E1>(x) }; } -using fn_tanh = internal::in_hyperbolic<>::fn_tanh; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> tanh(const T1& x) +KFR_INTRIN T1 tanh(const T1& x) { - return internal::in_hyperbolic<>::tanh(x); + return internal::tanh(x); } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_tanh, E1> tanh(E1&& x) +KFR_INTRIN expr_func<internal::fn_tanh, E1> tanh(E1&& x) { - return { fn_tanh(), std::forward<E1>(x) }; + return { {}, std::forward<E1>(x) }; } -using fn_coth = internal::in_hyperbolic<>::fn_coth; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> coth(const T1& x) +KFR_INTRIN T1 coth(const T1& x) { - return internal::in_hyperbolic<>::coth(x); + return internal::coth(x); } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_coth, E1> coth(E1&& x) +KFR_INTRIN expr_func<internal::fn_coth, E1> coth(E1&& x) { - return { fn_coth(), std::forward<E1>(x) }; + return { {}, std::forward<E1>(x) }; } -using fn_sinhcosh = internal::in_hyperbolic<>::fn_sinhcosh; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> sinhcosh(const T1& x) +KFR_INTRIN T1 sinhcosh(const T1& x) { - return internal::in_hyperbolic<>::sinhcosh(x); + return internal::sinhcosh(x); } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_sinhcosh, E1> sinhcosh(E1&& x) +KFR_INTRIN expr_func<internal::fn_sinhcosh, E1> sinhcosh(E1&& x) { - return { fn_sinhcosh(), std::forward<E1>(x) }; + return { {}, std::forward<E1>(x) }; } -using fn_coshsinh = internal::in_hyperbolic<>::fn_coshsinh; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> coshsinh(const T1& x) +KFR_INTRIN T1 coshsinh(const T1& x) { - return internal::in_hyperbolic<>::coshsinh(x); + return internal::coshsinh(x); } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_coshsinh, E1> coshsinh(E1&& x) +KFR_INTRIN expr_func<internal::fn_coshsinh, E1> coshsinh(E1&& x) { - return { fn_coshsinh(), std::forward<E1>(x) }; -} + return { {}, std::forward<E1>(x) }; } } diff --git a/include/kfr/base/log_exp.hpp b/include/kfr/base/log_exp.hpp @@ -26,518 +26,490 @@ #include "constants.hpp" #include "function.hpp" #include "min_max.hpp" +#include "clamp.hpp" #include "operators.hpp" #include "round.hpp" #include "select.hpp" #include "shuffle.hpp" -#pragma clang diagnostic push -#if CID_HAS_WARNING("-Winaccessible-base") -#pragma clang diagnostic ignored "-Winaccessible-base" -#endif - namespace kfr { namespace internal { -template <cpu_t c = cpu_t::native> -struct in_log_exp : in_select<c>, in_min_max<c>, in_clamp<c>, in_round<c>, in_abs<c> -{ -private: - constexpr static cpu_t cur = c; - using in_select<c>::select; - using in_round<c>::floor; - using in_clamp<c>::clamp; - using in_abs<c>::abs; - -public: - template <size_t N> - KFR_SINTRIN vec<i32, N> vilogbp1(vec<f32, N> d) - { - mask<i32, N> m = d < 5.421010862427522E-20f; - d = select(m, 1.8446744073709552E19f * d, d); - vec<i32, N> q = (ibitcast(d) >> 23) & 0xff; - q = select(m, q - (64 + 0x7e), q - 0x7e); - return q; - } - - template <size_t N> - KFR_SINTRIN vec<i64, N> vilogbp1(vec<f64, N> d) - { - mask<i64, N> m = d < 4.9090934652977266E-91; - d = select(m, 2.037035976334486E90 * d, d); - vec<i64, N> q = (ibitcast(d) >> 52) & 0x7ff; - q = select(m, q - (300 + 0x03fe), q - 0x03fe); - return q; - } - - template <size_t N> - KFR_SINTRIN vec<f32, N> vldexpk(vec<f32, N> x, vec<i32, N> q) - { - vec<i32, N> m = q >> 31; - m = (((m + q) >> 6) - m) << 4; - q = q - (m << 2); - m = clamp(m + 0x7f, vec<i32, N>(0xff)); - vec<f32, N> u = pow4(bitcast<f32>(cast<i32>(m) << 23)); - return x * u * bitcast<f32>((cast<i32>(q + 0x7f)) << 23); - } - - template <size_t N> - KFR_SINTRIN vec<f64, N> vldexpk(vec<f64, N> x, vec<i64, N> q) - { - vec<i64, N> m = q >> 31; - m = (((m + q) >> 9) - m) << 7; - q = q - (m << 2); - m = clamp(m + 0x3ff, i64(0x7ff)); - vec<f64, N> u = pow4(bitcast<f64>(cast<i64>(m) << 52)); - return x * u * bitcast<f64>((cast<i64>(q + 0x3ff)) << 52); - } - - template <typename T, size_t N> - KFR_SINTRIN vec<T, N> logb(vec<T, N> x) - { - return select(x == T(), -c_infinity<T>, cast<T>(vilogbp1(x) - 1)); - } - - template <size_t N> - KFR_SINTRIN vec<f32, N> log(vec<f32, N> d) - { - vec<i32, N> e = vilogbp1(d * 0.7071); // 0678118654752440084436210485f ); - vec<f32, N> m = vldexpk(d, -e); - - vec<f32, N> x = (m - 1.0f) / (m + 1.0f); - vec<f32, N> x2 = x * x; - - vec<f32, N> sp = select(d < 0, c_qnan<f32>, c_neginfinity<f32>); - - vec<f32, N> t = 0.2371599674224853515625f; - t = fmadd(t, x2, 0.285279005765914916992188f); - t = fmadd(t, x2, 0.400005519390106201171875f); - t = fmadd(t, x2, 0.666666567325592041015625f); - t = fmadd(t, x2, 2.0f); - - x = x * t + c_log_2<f32> * cast<f32>(e); - x = select(d > 0, x, sp); - - return x; - } - - template <size_t N> - KFR_SINTRIN vec<f64, N> log(vec<f64, N> d) - { - vec<i64, N> e = vilogbp1(d * 0.7071); // 0678118654752440084436210485 ); - vec<f64, N> m = vldexpk(d, -e); - - vec<f64, N> x = (m - 1.0) / (m + 1.0); - vec<f64, N> x2 = x * x; - - vec<f64, N> sp = select(d < 0, c_qnan<f64>, c_neginfinity<f64>); - - vec<f64, N> t = 0.148197055177935105296783; - t = fmadd(t, x2, 0.153108178020442575739679); - t = fmadd(t, x2, 0.181837339521549679055568); - t = fmadd(t, x2, 0.22222194152736701733275); - t = fmadd(t, x2, 0.285714288030134544449368); - t = fmadd(t, x2, 0.399999999989941956712869); - t = fmadd(t, x2, 0.666666666666685503450651); - t = fmadd(t, x2, 2); - - x = x * t + c_log_2<f64> * cast<f64>(e); - x = select(d > 0, x, sp); - - return x; - } - - template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)> - KFR_SINTRIN vec<T, N> log2(vec<T, N> x) - { - return log(x) * c_recip_log_2<T>; - } - template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)> - KFR_SINTRIN vec<T, N> log10(vec<T, N> x) - { - return log(x) * c_recip_log_10<T>; - } - - template <size_t N> - KFR_SINTRIN vec<f32, N> exp(vec<f32, N> d) - { - const f32 ln2_part1 = 0.6931457519f; - const f32 ln2_part2 = 1.4286067653e-6f; - - vec<i32, N> q = cast<i32>(floor(d * c_recip_log_2<f32>)); - vec<f32, N> s, u; - - s = fmadd(cast<f32>(q), -ln2_part1, d); - s = fmadd(cast<f32>(q), -ln2_part2, s); - - const f32 c2 = 0.4999999105930328369140625f; - const f32 c3 = 0.166668415069580078125f; - const f32 c4 = 4.16539050638675689697265625e-2f; - const f32 c5 = 8.378830738365650177001953125e-3f; - const f32 c6 = 1.304379315115511417388916015625e-3f; - const f32 c7 = 2.7555381529964506626129150390625e-4f; - - u = c7; - u = fmadd(u, s, c6); - u = fmadd(u, s, c5); - u = fmadd(u, s, c4); - u = fmadd(u, s, c3); - u = fmadd(u, s, c2); - - u = s * s * u + s + 1.0f; - u = vldexpk(u, q); - - u = select(d == c_neginfinity<f32>, 0.f, u); - - return u; - } - - template <size_t N> - KFR_SINTRIN vec<f64, N> exp(vec<f64, N> d) - { - const f64 ln2_part1 = 0.69314717501401901245; - const f64 ln2_part2 = 5.545926273775592108e-009; - - vec<i64, N> q = cast<i64>(floor(d * c_recip_log_2<f64>)); - vec<f64, N> s, u; - - s = fmadd(cast<f64>(q), -ln2_part1, d); - s = fmadd(cast<f64>(q), -ln2_part2, s); - - const f64 c2 = 0.499999999999994948485237955537741072475910186767578; - const f64 c3 = 0.166666666667024204739888659787538927048444747924805; - const f64 c4 = 4.16666666578945840693215529881854308769106864929199e-2; - const f64 c5 = 8.3333334397461874404333670440792047884315252304077e-3; - const f64 c6 = 1.3888881489747750223179290074426717183087021112442e-3; - const f64 c7 = 1.9841587032493949419205414574918222569976933300495e-4; - const f64 c8 = 2.47929324077393282239802768662784160369483288377523e-5; - const f64 c9 = 2.77076037925831049422552981864598109496000688523054e-6; - const f64 c10 = 2.59589616274586264243611237120812340606335055781528e-7; - const f64 c11 = 3.43801438838789632454461529017381016259946591162588e-8; - - u = c11; - u = fmadd(u, s, c10); - u = fmadd(u, s, c9); - u = fmadd(u, s, c8); - u = fmadd(u, s, c7); - u = fmadd(u, s, c6); - u = fmadd(u, s, c5); - u = fmadd(u, s, c4); - u = fmadd(u, s, c3); - u = fmadd(u, s, c2); - - u = s * s * u + s + 1.0; - u = vldexpk(u, q); - - u = select(d == c_neginfinity<f64>, 0.0, u); - - return u; - } - template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)> - KFR_SINTRIN vec<T, N> exp2(vec<T, N> x) - { - return exp(x * c_log_2<T>); - } - template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)> - KFR_SINTRIN vec<T, N> exp10(vec<T, N> x) - { - return exp(x * c_log_10<T>); - } - - template <typename T1, typename T2> - KFR_SINTRIN common_type<T1, T2> logn(const T1& a, const T2& b) - { - return log(a) / log(b); - } - - template <typename T1, typename T2> - KFR_SINTRIN common_type<T1, T2> logm(const T1& a, const T2& b) - { - return log(a) * b; - } - - template <typename T1, typename T2, typename T3> - KFR_SINTRIN common_type<T1, T2, T3> exp_fmadd(const T1& x, const T2& m, const T3& a) - { - return exp(fmadd(x, m, a)); - } - - template <typename T1, typename T2, typename T3> - KFR_SINTRIN common_type<T1, T2, T3> log_fmadd(const T1& x, const T2& m, const T3& a) - { - return fmadd(log(x), m, a); - } - - template <typename T, size_t N> - KFR_SINTRIN vec<T, N> pow(vec<T, N> a, vec<T, N> b) - { - const vec<T, N> t = exp(b * log(abs(a))); - const mask<T, N> isint = floor(b) == b; - const mask<T, N> iseven = (cast<itype<T>>(b) & 1) == 0; - return select(a > T(), t, - select(a == T(), T(1), select(isint, select(iseven, t, -t), broadcast<N>(c_qnan<T>)))); - } - - template <typename T, size_t N> - KFR_SINTRIN vec<T, N> root(vec<T, N> x, vec<T, N> b) - { - return exp(reciprocal(b) * log(x)); - } - - template <typename T, size_t N> - KFR_SINTRIN vec<T, N> cbrt(vec<T, N> x) - { - return pow<T, N>(x, T(0.333333333333333333333333333333333)); - } - - template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>> - KFR_SINTRIN vec<Tout, N> exp(vec<T, N> x) - { - return exp(cast<Tout>(x)); - } - template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>> - KFR_SINTRIN vec<Tout, N> exp2(vec<T, N> x) - { - return exp2(cast<Tout>(x)); - } - template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>> - KFR_SINTRIN vec<Tout, N> exp10(vec<T, N> x) - { - return exp10(cast<Tout>(x)); - } - template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>> - KFR_SINTRIN vec<Tout, N> log(vec<T, N> x) - { - return log(cast<Tout>(x)); - } - template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>> - KFR_SINTRIN vec<Tout, N> log2(vec<T, N> x) - { - return log2(cast<Tout>(x)); - } - template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>> - KFR_SINTRIN vec<Tout, N> log10(vec<T, N> x) - { - return log10(cast<Tout>(x)); - } - template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>> - KFR_SINTRIN vec<Tout, N> cbrt(vec<T, N> x) - { - return cbrt(cast<Tout>(x)); - } - - KFR_HANDLE_SCALAR(exp) - KFR_HANDLE_SCALAR(exp2) - KFR_HANDLE_SCALAR(exp10) - KFR_HANDLE_SCALAR(log) - KFR_HANDLE_SCALAR(log2) - KFR_HANDLE_SCALAR(log10) - KFR_HANDLE_SCALAR(logb) - KFR_HANDLE_SCALAR(pow) - KFR_HANDLE_SCALAR(root) - KFR_HANDLE_SCALAR(cbrt) - - KFR_SPEC_FN(in_log_exp, exp) - KFR_SPEC_FN(in_log_exp, exp2) - KFR_SPEC_FN(in_log_exp, exp10) - KFR_SPEC_FN(in_log_exp, log) - KFR_SPEC_FN(in_log_exp, log2) - KFR_SPEC_FN(in_log_exp, log10) - KFR_SPEC_FN(in_log_exp, logb) - KFR_SPEC_FN(in_log_exp, logn) - KFR_SPEC_FN(in_log_exp, logm) - KFR_SPEC_FN(in_log_exp, exp_fmadd) - KFR_SPEC_FN(in_log_exp, log_fmadd) - KFR_SPEC_FN(in_log_exp, pow) - KFR_SPEC_FN(in_log_exp, root) - KFR_SPEC_FN(in_log_exp, cbrt) -}; -} -namespace native -{ -using fn_exp = internal::in_log_exp<>::fn_exp; -template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> exp(const T1& x) +template <size_t N> +KFR_SINTRIN vec<i32, N> vilogbp1(vec<f32, N> d) { - return internal::in_log_exp<>::exp(x); + mask<i32, N> m = d < 5.421010862427522E-20f; + d = select(m, 1.8446744073709552E19f * d, d); + vec<i32, N> q = (ibitcast(d) >> 23) & 0xff; + q = select(m, q - (64 + 0x7e), q - 0x7e); + return q; } -template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_exp, E1> exp(E1&& x) +template <size_t N> +KFR_SINTRIN vec<i64, N> vilogbp1(vec<f64, N> d) { - return { fn_exp(), std::forward<E1>(x) }; + mask<i64, N> m = d < 4.9090934652977266E-91; + d = select(m, 2.037035976334486E90 * d, d); + vec<i64, N> q = (ibitcast(d) >> 52) & 0x7ff; + q = select(m, q - (300 + 0x03fe), q - 0x03fe); + return q; } -using fn_exp2 = internal::in_log_exp<>::fn_exp2; -template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> exp2(const T1& x) +template <size_t N> +KFR_SINTRIN vec<f32, N> vldexpk(vec<f32, N> x, vec<i32, N> q) { - return internal::in_log_exp<>::exp2(x); + vec<i32, N> m = q >> 31; + m = (((m + q) >> 6) - m) << 4; + q = q - (m << 2); + m = clamp(m + 0x7f, vec<i32, N>(0xff)); + vec<f32, N> u = pow4(bitcast<f32>(cast<i32>(m) << 23)); + return x * u * bitcast<f32>((cast<i32>(q + 0x7f)) << 23); } -template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_exp2, E1> exp2(E1&& x) +template <size_t N> +KFR_SINTRIN vec<f64, N> vldexpk(vec<f64, N> x, vec<i64, N> q) +{ + vec<i64, N> m = q >> 31; + m = (((m + q) >> 9) - m) << 7; + q = q - (m << 2); + m = clamp(m + 0x3ff, i64(0x7ff)); + vec<f64, N> u = pow4(bitcast<f64>(cast<i64>(m) << 52)); + return x * u * bitcast<f64>((cast<i64>(q + 0x3ff)) << 52); +} + +template <typename T, size_t N> +KFR_SINTRIN vec<T, N> logb(vec<T, N> x) +{ + return select(x == T(), -c_infinity<T>, cast<T>(vilogbp1(x) - 1)); +} + +template <size_t N> +KFR_SINTRIN vec<f32, N> log(vec<f32, N> d) +{ + vec<i32, N> e = vilogbp1(d * 0.7071); // 0678118654752440084436210485f ); + vec<f32, N> m = vldexpk(d, -e); + + vec<f32, N> x = (m - 1.0f) / (m + 1.0f); + vec<f32, N> x2 = x * x; + + vec<f32, N> sp = select(d < 0, c_qnan<f32>, c_neginfinity<f32>); + + vec<f32, N> t = 0.2371599674224853515625f; + t = fmadd(t, x2, 0.285279005765914916992188f); + t = fmadd(t, x2, 0.400005519390106201171875f); + t = fmadd(t, x2, 0.666666567325592041015625f); + t = fmadd(t, x2, 2.0f); + + x = x * t + c_log_2<f32> * cast<f32>(e); + x = select(d > 0, x, sp); + + return x; +} + +template <size_t N> +KFR_SINTRIN vec<f64, N> log(vec<f64, N> d) +{ + vec<i64, N> e = vilogbp1(d * 0.7071); // 0678118654752440084436210485 ); + vec<f64, N> m = vldexpk(d, -e); + + vec<f64, N> x = (m - 1.0) / (m + 1.0); + vec<f64, N> x2 = x * x; + + vec<f64, N> sp = select(d < 0, c_qnan<f64>, c_neginfinity<f64>); + + vec<f64, N> t = 0.148197055177935105296783; + t = fmadd(t, x2, 0.153108178020442575739679); + t = fmadd(t, x2, 0.181837339521549679055568); + t = fmadd(t, x2, 0.22222194152736701733275); + t = fmadd(t, x2, 0.285714288030134544449368); + t = fmadd(t, x2, 0.399999999989941956712869); + t = fmadd(t, x2, 0.666666666666685503450651); + t = fmadd(t, x2, 2); + + x = x * t + c_log_2<f64> * cast<f64>(e); + x = select(d > 0, x, sp); + + return x; +} + +template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)> +KFR_SINTRIN vec<T, N> log2(vec<T, N> x) +{ + return log(x) * c_recip_log_2<T>; +} +template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)> +KFR_SINTRIN vec<T, N> log10(vec<T, N> x) +{ + return log(x) * c_recip_log_10<T>; +} + +template <size_t N> +KFR_SINTRIN vec<f32, N> exp(vec<f32, N> d) +{ + const f32 ln2_part1 = 0.6931457519f; + const f32 ln2_part2 = 1.4286067653e-6f; + + vec<i32, N> q = cast<i32>(floor(d * c_recip_log_2<f32>)); + vec<f32, N> s, u; + + s = fmadd(cast<f32>(q), -ln2_part1, d); + s = fmadd(cast<f32>(q), -ln2_part2, s); + + const f32 c2 = 0.4999999105930328369140625f; + const f32 c3 = 0.166668415069580078125f; + const f32 c4 = 4.16539050638675689697265625e-2f; + const f32 c5 = 8.378830738365650177001953125e-3f; + const f32 c6 = 1.304379315115511417388916015625e-3f; + const f32 c7 = 2.7555381529964506626129150390625e-4f; + + u = c7; + u = fmadd(u, s, c6); + u = fmadd(u, s, c5); + u = fmadd(u, s, c4); + u = fmadd(u, s, c3); + u = fmadd(u, s, c2); + + u = s * s * u + s + 1.0f; + u = vldexpk(u, q); + + u = select(d == c_neginfinity<f32>, 0.f, u); + + return u; +} + +template <size_t N> +KFR_SINTRIN vec<f64, N> exp(vec<f64, N> d) +{ + const f64 ln2_part1 = 0.69314717501401901245; + const f64 ln2_part2 = 5.545926273775592108e-009; + + vec<i64, N> q = cast<i64>(floor(d * c_recip_log_2<f64>)); + vec<f64, N> s, u; + + s = fmadd(cast<f64>(q), -ln2_part1, d); + s = fmadd(cast<f64>(q), -ln2_part2, s); + + const f64 c2 = 0.499999999999994948485237955537741072475910186767578; + const f64 c3 = 0.166666666667024204739888659787538927048444747924805; + const f64 c4 = 4.16666666578945840693215529881854308769106864929199e-2; + const f64 c5 = 8.3333334397461874404333670440792047884315252304077e-3; + const f64 c6 = 1.3888881489747750223179290074426717183087021112442e-3; + const f64 c7 = 1.9841587032493949419205414574918222569976933300495e-4; + const f64 c8 = 2.47929324077393282239802768662784160369483288377523e-5; + const f64 c9 = 2.77076037925831049422552981864598109496000688523054e-6; + const f64 c10 = 2.59589616274586264243611237120812340606335055781528e-7; + const f64 c11 = 3.43801438838789632454461529017381016259946591162588e-8; + + u = c11; + u = fmadd(u, s, c10); + u = fmadd(u, s, c9); + u = fmadd(u, s, c8); + u = fmadd(u, s, c7); + u = fmadd(u, s, c6); + u = fmadd(u, s, c5); + u = fmadd(u, s, c4); + u = fmadd(u, s, c3); + u = fmadd(u, s, c2); + + u = s * s * u + s + 1.0; + u = vldexpk(u, q); + + u = select(d == c_neginfinity<f64>, 0.0, u); + + return u; +} +template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)> +KFR_SINTRIN vec<T, N> exp2(vec<T, N> x) +{ + return exp(x * c_log_2<T>); +} +template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)> +KFR_SINTRIN vec<T, N> exp10(vec<T, N> x) +{ + return exp(x * c_log_10<T>); +} + +template <typename T1, typename T2> +KFR_SINTRIN common_type<T1, T2> logn(const T1& a, const T2& b) +{ + return log(a) / log(b); +} + +template <typename T1, typename T2> +KFR_SINTRIN common_type<T1, T2> logm(const T1& a, const T2& b) +{ + return log(a) * b; +} + +template <typename T1, typename T2, typename T3> +KFR_SINTRIN common_type<T1, T2, T3> exp_fmadd(const T1& x, const T2& m, const T3& a) { - return { fn_exp2(), std::forward<E1>(x) }; + return exp(fmadd(x, m, a)); +} + +template <typename T1, typename T2, typename T3> +KFR_SINTRIN common_type<T1, T2, T3> log_fmadd(const T1& x, const T2& m, const T3& a) +{ + return fmadd(log(x), m, a); +} + +template <typename T, size_t N> +KFR_SINTRIN vec<T, N> pow(vec<T, N> a, vec<T, N> b) +{ + const vec<T, N> t = exp(b * log(abs(a))); + const mask<T, N> isint = floor(b) == b; + const mask<T, N> iseven = (cast<itype<T>>(b) & 1) == 0; + return select(a > T(), t, + select(a == T(), T(1), select(isint, select(iseven, t, -t), broadcast<N>(c_qnan<T>)))); +} + +template <typename T, size_t N> +KFR_SINTRIN vec<T, N> root(vec<T, N> x, vec<T, N> b) +{ + return exp(reciprocal(b) * log(x)); +} + +template <typename T, size_t N> +KFR_SINTRIN vec<T, N> cbrt(vec<T, N> x) +{ + return pow<T, N>(x, T(0.333333333333333333333333333333333)); +} + +template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>> +KFR_SINTRIN vec<Tout, N> exp(vec<T, N> x) +{ + return exp(cast<Tout>(x)); +} +template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>> +KFR_SINTRIN vec<Tout, N> exp2(vec<T, N> x) +{ + return exp2(cast<Tout>(x)); +} +template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>> +KFR_SINTRIN vec<Tout, N> exp10(vec<T, N> x) +{ + return exp10(cast<Tout>(x)); +} +template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>> +KFR_SINTRIN vec<Tout, N> log(vec<T, N> x) +{ + return log(cast<Tout>(x)); +} +template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>> +KFR_SINTRIN vec<Tout, N> log2(vec<T, N> x) +{ + return log2(cast<Tout>(x)); +} +template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>> +KFR_SINTRIN vec<Tout, N> log10(vec<T, N> x) +{ + return log10(cast<Tout>(x)); +} +template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>> +KFR_SINTRIN vec<Tout, N> cbrt(vec<T, N> x) +{ + return cbrt(cast<Tout>(x)); +} + +KFR_HANDLE_SCALAR(exp) +KFR_HANDLE_SCALAR(exp2) +KFR_HANDLE_SCALAR(exp10) +KFR_HANDLE_SCALAR(log) +KFR_HANDLE_SCALAR(log2) +KFR_HANDLE_SCALAR(log10) +KFR_HANDLE_SCALAR(logb) +KFR_HANDLE_SCALAR(pow) +KFR_HANDLE_SCALAR(root) +KFR_HANDLE_SCALAR(cbrt) + +KFR_FN(exp) +KFR_FN(exp2) +KFR_FN(exp10) +KFR_FN(log) +KFR_FN(log2) +KFR_FN(log10) +KFR_FN(logb) +KFR_FN(logn) +KFR_FN(logm) +KFR_FN(exp_fmadd) +KFR_FN(log_fmadd) +KFR_FN(pow) +KFR_FN(root) +KFR_FN(cbrt) } -using fn_exp10 = internal::in_log_exp<>::fn_exp10; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> exp10(const T1& x) +KFR_INTRIN T1 exp(const T1& x) { - return internal::in_log_exp<>::exp10(x); + return internal::exp(x); } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_exp10, E1> exp10(E1&& x) +KFR_INTRIN expr_func<internal::fn_exp, E1> exp(E1&& x) { - return { fn_exp10(), std::forward<E1>(x) }; + return { {}, std::forward<E1>(x) }; } -using fn_log = internal::in_log_exp<>::fn_log; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> log(const T1& x) +KFR_INTRIN T1 exp2(const T1& x) { - return internal::in_log_exp<>::log(x); + return internal::exp2(x); } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_log, E1> log(E1&& x) +KFR_INTRIN expr_func<internal::fn_exp2, E1> exp2(E1&& x) { - return { fn_log(), std::forward<E1>(x) }; + return { {}, std::forward<E1>(x) }; } -using fn_log2 = internal::in_log_exp<>::fn_log2; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> log2(const T1& x) +KFR_INTRIN T1 exp10(const T1& x) { - return internal::in_log_exp<>::log2(x); + return internal::exp10(x); } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_log2, E1> log2(E1&& x) +KFR_INTRIN expr_func<internal::fn_exp10, E1> exp10(E1&& x) { - return { fn_log2(), std::forward<E1>(x) }; + return { {}, std::forward<E1>(x) }; } -using fn_log10 = internal::in_log_exp<>::fn_log10; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> log10(const T1& x) +KFR_INTRIN T1 log(const T1& x) { - return internal::in_log_exp<>::log10(x); + return internal::log(x); } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_log10, E1> log10(E1&& x) +KFR_INTRIN expr_func<internal::fn_log, E1> log(E1&& x) { - return { fn_log10(), std::forward<E1>(x) }; + return { {}, std::forward<E1>(x) }; } -using fn_logb = internal::in_log_exp<>::fn_logb; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> logb(const T1& x) +KFR_INTRIN T1 log2(const T1& x) { - return internal::in_log_exp<>::logb(x); + return internal::log2(x); } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_logb, E1> logb(E1&& x) +KFR_INTRIN expr_func<internal::fn_log2, E1> log2(E1&& x) { - return { fn_logb(), std::forward<E1>(x) }; + return { {}, std::forward<E1>(x) }; } -using fn_logn = internal::in_log_exp<>::fn_logn; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> logn(const T1& x) +KFR_INTRIN T1 log10(const T1& x) { - return internal::in_log_exp<>::logn(x); + return internal::log10(x); } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_logn, E1> logn(E1&& x) +KFR_INTRIN expr_func<internal::fn_log10, E1> log10(E1&& x) { - return { fn_logn(), std::forward<E1>(x) }; + return { {}, std::forward<E1>(x) }; } -using fn_logm = internal::in_log_exp<>::fn_logm; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> logm(const T1& x) +KFR_INTRIN T1 logb(const T1& x) { - return internal::in_log_exp<>::logm(x); + return internal::logb(x); } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_logm, E1> logm(E1&& x) +KFR_INTRIN expr_func<internal::fn_logb, E1> logb(E1&& x) +{ + return { {}, std::forward<E1>(x) }; +} + +template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)> +KFR_INTRIN common_type<T1, T2> logn(const T1& x, const T2& y) +{ + return internal::logn(x, y); +} + +template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)> +KFR_INTRIN expr_func<internal::fn_logn, E1, E2> logn(E1&& x, E2&& y) +{ + return { {}, std::forward<E1>(x), std::forward<E2>(y) }; +} + +template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)> +KFR_INTRIN common_type<T1, T2> logm(const T1& x, const T2& y) { - return { fn_logm(), std::forward<E1>(x) }; + return internal::logm(x, y); +} + +template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)> +KFR_INTRIN expr_func<internal::fn_logm, E1, E2> logm(E1&& x, E2&& y) +{ + return { {}, std::forward<E1>(x), std::forward<E2>(y) }; } -using fn_exp_fmadd = internal::in_log_exp<>::fn_exp_fmadd; template <typename T1, typename T2, typename T3, KFR_ENABLE_IF(is_numeric_args<T1, T2, T3>::value)> -KFR_INLINE ftype<common_type<T1, T2, T3>> exp_fmadd(const T1& x, const T2& m, const T3& a) +KFR_INTRIN common_type<T1, T2, T3> exp_fmadd(const T1& x, const T2& y, const T3& z) { - return internal::in_log_exp<>::exp_fmadd(x, m, a); + return internal::exp_fmadd(x, y, z); } template <typename E1, typename E2, typename E3, KFR_ENABLE_IF(is_input_expressions<E1, E2, E3>::value)> -KFR_INLINE expr_func<fn_exp_fmadd, E1, E2, E3> exp_fmadd(E1&& x, E2&& m, E3&& a) +KFR_INTRIN expr_func<internal::fn_exp_fmadd, E1, E2, E3> exp_fmadd(E1&& x, E2&& y, E3&& z) { - return { fn_exp_fmadd(), std::forward<E1>(x), std::forward<E2>(m), std::forward<E3>(a) }; + return { {}, std::forward<E1>(x), std::forward<E2>(y), std::forward<E3>(z) }; } -using fn_log_fmadd = internal::in_log_exp<>::fn_log_fmadd; + template <typename T1, typename T2, typename T3, KFR_ENABLE_IF(is_numeric_args<T1, T2, T3>::value)> -KFR_INLINE ftype<common_type<T1, T2, T3>> log_fmadd(const T1& x, const T2& m, const T3& a) +KFR_INTRIN common_type<T1, T2, T3> log_fmadd(const T1& x, const T2& y, const T3& z) { - return internal::in_log_exp<>::log_fmadd(x, m, a); + return internal::log_fmadd(x, y, z); } template <typename E1, typename E2, typename E3, KFR_ENABLE_IF(is_input_expressions<E1, E2, E3>::value)> -KFR_INLINE expr_func<fn_log_fmadd, E1, E2, E3> log_fmadd(E1&& x, E2&& m, E3&& a) +KFR_INTRIN expr_func<internal::fn_log_fmadd, E1, E2, E3> log_fmadd(E1&& x, E2&& y, E3&& z) { - return { fn_log_fmadd(), std::forward<E1>(x), std::forward<E2>(m), std::forward<E3>(a) }; + return { {}, std::forward<E1>(x), std::forward<E2>(y), std::forward<E3>(z) }; } -using fn_pow = internal::in_log_exp<>::fn_pow; template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)> -KFR_INLINE ftype<common_type<T1, T2>> pow(const T1& x, const T2& b) +KFR_INTRIN common_type<T1, T2> pow(const T1& x, const T2& y) { - return internal::in_log_exp<>::pow(x, b); + return internal::pow(x, y); } template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)> -KFR_INLINE expr_func<fn_pow, E1, E2> pow(E1&& x, E2&& b) +KFR_INTRIN expr_func<internal::fn_pow, E1, E2> pow(E1&& x, E2&& y) { - return { fn_pow(), std::forward<E1>(x), std::forward<E2>(b) }; + return { {}, std::forward<E1>(x), std::forward<E2>(y) }; } -using fn_root = internal::in_log_exp<>::fn_root; + template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)> -KFR_INLINE ftype<common_type<T1, T2>> root(const T1& x, const T2& b) +KFR_INTRIN common_type<T1, T2> root(const T1& x, const T2& y) { - return internal::in_log_exp<>::root(x, b); + return internal::root(x, y); } template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)> -KFR_INLINE expr_func<fn_root, E1, E2> root(E1&& x, E2&& b) +KFR_INTRIN expr_func<internal::fn_root, E1, E2> root(E1&& x, E2&& y) { - return { fn_root(), std::forward<E1>(x), std::forward<E2>(b) }; + return { {}, std::forward<E1>(x), std::forward<E2>(y) }; } -using fn_cbrt = internal::in_log_exp<>::fn_cbrt; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> cbrt(const T1& x) +KFR_INTRIN T1 cbrt(const T1& x) { - return internal::in_log_exp<>::cbrt(x); + return internal::cbrt(x); } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_cbrt, E1> cbrt(E1&& x) +KFR_INTRIN expr_func<internal::fn_cbrt, E1> cbrt(E1&& x) { - return { fn_cbrt(), std::forward<E1>(x) }; -} -} + return { {}, std::forward<E1>(x) }; } -#pragma clang diagnostic pop + + + +} diff --git a/include/kfr/base/logical.hpp b/include/kfr/base/logical.hpp @@ -28,30 +28,22 @@ namespace kfr { +namespace internal +{ + template <size_t bits> struct bitmask { using type = findinttype<0, (1ull << bits) - 1>; + bitmask(type val) : value(val) {} + template <typename Itype> bitmask(Itype val) : value(static_cast<type>(val)) { } - type value; -}; - -namespace internal -{ -template <cpu_t c = cpu_t::native> -struct in_bittest : in_bittest<older(c)> -{ - struct fn_bittestnone : fn_disabled - { - }; - struct fn_bittestall : fn_disabled - { - }; + type value; }; struct logical_and @@ -61,6 +53,7 @@ struct logical_and { return x && y; } + template <typename T> T operator()(initialvalue<T>) { @@ -68,307 +61,190 @@ struct logical_and } }; -template <> -struct in_bittest<cpu_t::common> -{ - constexpr static cpu_t cpu = cpu_t::common; - - template <typename T, size_t N> - KFR_SINTRIN bitmask<N> getmask(vec<T, N> x) - { - typename bitmask<N>::type val = 0; - for (size_t i = 0; i < N; i++) - { - val |= (ubitcast(x[i]) >> (typebits<T>::bits - 1)) << i; - } - return val; - } +#if defined CID_ARCH_SSE41 + +KFR_SINTRIN bool bittestnone(f32sse x, f32sse y) { return _mm_testz_ps(*x, *y); } +KFR_SINTRIN bool bittestnone(f64sse x, f64sse y) { return _mm_testz_pd(*x, *y); } +KFR_SINTRIN bool bittestnone(u8sse x, u8sse y) { return _mm_testz_si128(*x, *y); } +KFR_SINTRIN bool bittestnone(u16sse x, u16sse y) { return _mm_testz_si128(*x, *y); } +KFR_SINTRIN bool bittestnone(u32sse x, u32sse y) { return _mm_testz_si128(*x, *y); } +KFR_SINTRIN bool bittestnone(u64sse x, u64sse y) { return _mm_testz_si128(*x, *y); } +KFR_SINTRIN bool bittestnone(i8sse x, i8sse y) { return _mm_testz_si128(*x, *y); } +KFR_SINTRIN bool bittestnone(i16sse x, i16sse y) { return _mm_testz_si128(*x, *y); } +KFR_SINTRIN bool bittestnone(i32sse x, i32sse y) { return _mm_testz_si128(*x, *y); } +KFR_SINTRIN bool bittestnone(i64sse x, i64sse y) { return _mm_testz_si128(*x, *y); } +KFR_SINTRIN bool bittestnone(f32sse x) { return _mm_testz_ps(*x, *x); } +KFR_SINTRIN bool bittestnone(f64sse x) { return _mm_testz_pd(*x, *x); } +KFR_SINTRIN bool bittestnone(u8sse x) { return _mm_testz_si128(*x, *x); } +KFR_SINTRIN bool bittestnone(u16sse x) { return _mm_testz_si128(*x, *x); } +KFR_SINTRIN bool bittestnone(u32sse x) { return _mm_testz_si128(*x, *x); } +KFR_SINTRIN bool bittestnone(u64sse x) { return _mm_testz_si128(*x, *x); } +KFR_SINTRIN bool bittestnone(i8sse x) { return _mm_testz_si128(*x, *x); } +KFR_SINTRIN bool bittestnone(i16sse x) { return _mm_testz_si128(*x, *x); } +KFR_SINTRIN bool bittestnone(i32sse x) { return _mm_testz_si128(*x, *x); } +KFR_SINTRIN bool bittestnone(i64sse x) { return _mm_testz_si128(*x, *x); } +KFR_SINTRIN bool bittestall(f32sse x, f32sse y) { return _mm_testc_ps(*x, *y); } +KFR_SINTRIN bool bittestall(f64sse x, f64sse y) { return _mm_testc_pd(*x, *y); } +KFR_SINTRIN bool bittestall(u8sse x, u8sse y) { return _mm_testc_si128(*x, *y); } +KFR_SINTRIN bool bittestall(u16sse x, u16sse y) { return _mm_testc_si128(*x, *y); } +KFR_SINTRIN bool bittestall(u32sse x, u32sse y) { return _mm_testc_si128(*x, *y); } +KFR_SINTRIN bool bittestall(u64sse x, u64sse y) { return _mm_testc_si128(*x, *y); } +KFR_SINTRIN bool bittestall(i8sse x, i8sse y) { return _mm_testc_si128(*x, *y); } +KFR_SINTRIN bool bittestall(i16sse x, i16sse y) { return _mm_testc_si128(*x, *y); } +KFR_SINTRIN bool bittestall(i32sse x, i32sse y) { return _mm_testc_si128(*x, *y); } +KFR_SINTRIN bool bittestall(i64sse x, i64sse y) { return _mm_testc_si128(*x, *y); } +KFR_SINTRIN bool bittestall(f32sse x) { return _mm_testc_ps(*x, *allonesvector(x)); } +KFR_SINTRIN bool bittestall(f64sse x) { return _mm_testc_pd(*x, *allonesvector(x)); } +KFR_SINTRIN bool bittestall(u8sse x) { return _mm_testc_si128(*x, *allonesvector(x)); } +KFR_SINTRIN bool bittestall(u16sse x) { return _mm_testc_si128(*x, *allonesvector(x)); } +KFR_SINTRIN bool bittestall(u32sse x) { return _mm_testc_si128(*x, *allonesvector(x)); } +KFR_SINTRIN bool bittestall(u64sse x) { return _mm_testc_si128(*x, *allonesvector(x)); } +KFR_SINTRIN bool bittestall(i8sse x) { return _mm_testc_si128(*x, *allonesvector(x)); } +KFR_SINTRIN bool bittestall(i16sse x) { return _mm_testc_si128(*x, *allonesvector(x)); } +KFR_SINTRIN bool bittestall(i32sse x) { return _mm_testc_si128(*x, *allonesvector(x)); } +KFR_SINTRIN bool bittestall(i64sse x) { return _mm_testc_si128(*x, *allonesvector(x)); } + +#if defined CID_ARCH_AVX +KFR_SINTRIN bool bittestnone(f32avx x, f32avx y) { return _mm256_testz_ps(*x, *y); } +KFR_SINTRIN bool bittestnone(f64avx x, f64avx y) { return _mm256_testz_pd(*x, *y); } +KFR_SINTRIN bool bittestnone(f32avx x) { return _mm256_testz_ps(*x, *x); } +KFR_SINTRIN bool bittestnone(f64avx x) { return _mm256_testz_pd(*x, *x); } +KFR_SINTRIN bool bittestnall(f32avx x, f32avx y) { return _mm256_testc_ps(*x, *y); } +KFR_SINTRIN bool bittestnall(f64avx x, f64avx y) { return _mm256_testc_pd(*x, *y); } +KFR_SINTRIN bool bittestnall(f32avx x) { return _mm256_testc_ps(*x, *allonesvector(x)); } +KFR_SINTRIN bool bittestnall(f64avx x) { return _mm256_testc_pd(*x, *allonesvector(x)); } +#endif - template <typename T, size_t N> - KFR_SINTRIN bool bittestnone(vec<T, N> x) - { - return !getmask(x).value; - } - template <typename T, size_t N> - KFR_SINTRIN bool bittestnone(vec<T, N> x, vec<T, N> y) - { - return bittestnone(x & y); - } +#if defined CID_ARCH_AVX2 +KFR_SINTRIN bool bittestnone(u8avx x, u8avx y) { return _mm256_testz_si256(*x, *y); } +KFR_SINTRIN bool bittestnone(u16avx x, u16avx y) { return _mm256_testz_si256(*x, *y); } +KFR_SINTRIN bool bittestnone(u32avx x, u32avx y) { return _mm256_testz_si256(*x, *y); } +KFR_SINTRIN bool bittestnone(u64avx x, u64avx y) { return _mm256_testz_si256(*x, *y); } +KFR_SINTRIN bool bittestnone(i8avx x, i8avx y) { return _mm256_testz_si256(*x, *y); } +KFR_SINTRIN bool bittestnone(i16avx x, i16avx y) { return _mm256_testz_si256(*x, *y); } +KFR_SINTRIN bool bittestnone(i32avx x, i32avx y) { return _mm256_testz_si256(*x, *y); } +KFR_SINTRIN bool bittestnone(i64avx x, i64avx y) { return _mm256_testz_si256(*x, *y); } + +KFR_SINTRIN bool bittestnone(u8avx x) { return _mm256_testz_si256(*x, *x); } +KFR_SINTRIN bool bittestnone(u16avx x) { return _mm256_testz_si256(*x, *x); } +KFR_SINTRIN bool bittestnone(u32avx x) { return _mm256_testz_si256(*x, *x); } +KFR_SINTRIN bool bittestnone(u64avx x) { return _mm256_testz_si256(*x, *x); } +KFR_SINTRIN bool bittestnone(i8avx x) { return _mm256_testz_si256(*x, *x); } +KFR_SINTRIN bool bittestnone(i16avx x) { return _mm256_testz_si256(*x, *x); } +KFR_SINTRIN bool bittestnone(i32avx x) { return _mm256_testz_si256(*x, *x); } +KFR_SINTRIN bool bittestnone(i64avx x) { return _mm256_testz_si256(*x, *x); } + +KFR_SINTRIN bool bittestall(u8avx x, u8avx y) { return _mm256_testc_si256(*x, *y); } +KFR_SINTRIN bool bittestall(u16avx x, u16avx y) { return _mm256_testc_si256(*x, *y); } +KFR_SINTRIN bool bittestall(u32avx x, u32avx y) { return _mm256_testc_si256(*x, *y); } +KFR_SINTRIN bool bittestall(u64avx x, u64avx y) { return _mm256_testc_si256(*x, *y); } +KFR_SINTRIN bool bittestall(i8avx x, i8avx y) { return _mm256_testc_si256(*x, *y); } +KFR_SINTRIN bool bittestall(i16avx x, i16avx y) { return _mm256_testc_si256(*x, *y); } +KFR_SINTRIN bool bittestall(i32avx x, i32avx y) { return _mm256_testc_si256(*x, *y); } +KFR_SINTRIN bool bittestall(i64avx x, i64avx y) { return _mm256_testc_si256(*x, *y); } + +KFR_SINTRIN bool bittestall(u8avx x) { return _mm256_testc_si256(*x, *allonesvector(x)); } +KFR_SINTRIN bool bittestall(u16avx x) { return _mm256_testc_si256(*x, *allonesvector(x)); } +KFR_SINTRIN bool bittestall(u32avx x) { return _mm256_testc_si256(*x, *allonesvector(x)); } +KFR_SINTRIN bool bittestall(u64avx x) { return _mm256_testc_si256(*x, *allonesvector(x)); } +KFR_SINTRIN bool bittestall(i8avx x) { return _mm256_testc_si256(*x, *allonesvector(x)); } +KFR_SINTRIN bool bittestall(i16avx x) { return _mm256_testc_si256(*x, *allonesvector(x)); } +KFR_SINTRIN bool bittestall(i32avx x) { return _mm256_testc_si256(*x, *allonesvector(x)); } +KFR_SINTRIN bool bittestall(i64avx x) { return _mm256_testc_si256(*x, *allonesvector(x)); } +#endif - template <typename T, size_t N> - KFR_SINTRIN bool bittestall(vec<T, N> x) - { - return !getmask(~x).value; - } - template <typename T, size_t N> - KFR_SINTRIN bool bittestall(vec<T, N> x, vec<T, N> y) +#elif defined CID_ARCH_SSE2 + +KFR_SINTRIN bool bittestnone(f32sse x) { return !_mm_movemask_ps(*x); } +KFR_SINTRIN bool bittestnone(f64sse x) { return !_mm_movemask_pd(*x); } +KFR_SINTRIN bool bittestnone(u8sse x) { return !_mm_movemask_epi8(*x); } +KFR_SINTRIN bool bittestnone(u16sse x) { return !_mm_movemask_epi8(*x); } +KFR_SINTRIN bool bittestnone(u32sse x) { return !_mm_movemask_epi8(*x); } +KFR_SINTRIN bool bittestnone(u64sse x) { return !_mm_movemask_epi8(*x); } +KFR_SINTRIN bool bittestnone(i8sse x) { return !_mm_movemask_epi8(*x); } +KFR_SINTRIN bool bittestnone(i16sse x) { return !_mm_movemask_epi8(*x); } +KFR_SINTRIN bool bittestnone(i32sse x) { return !_mm_movemask_epi8(*x); } +KFR_SINTRIN bool bittestnone(i64sse x) { return !_mm_movemask_epi8(*x); } + +KFR_SINTRIN bool bittestnone(f32sse x, f32sse y) { return bittestnone(x & y); } +KFR_SINTRIN bool bittestnone(f64sse x, f64sse y) { return bittestnone(x & y); } +KFR_SINTRIN bool bittestnone(u8sse x, u8sse y) { return bittestnone(x & y); } +KFR_SINTRIN bool bittestnone(u16sse x, u16sse y) { return bittestnone(x & y); } +KFR_SINTRIN bool bittestnone(u32sse x, u32sse y) { return bittestnone(x & y); } +KFR_SINTRIN bool bittestnone(u64sse x, u64sse y) { return bittestnone(x & y); } +KFR_SINTRIN bool bittestnone(i8sse x, i8sse y) { return bittestnone(x & y); } +KFR_SINTRIN bool bittestnone(i16sse x, i16sse y) { return bittestnone(x & y); } +KFR_SINTRIN bool bittestnone(i32sse x, i32sse y) { return bittestnone(x & y); } +KFR_SINTRIN bool bittestnone(i64sse x, i64sse y) { return bittestnone(x & y); } + +KFR_SINTRIN bool bittestall(f32sse x) { return !_mm_movemask_ps(*~x); } +KFR_SINTRIN bool bittestall(f64sse x) { return !_mm_movemask_pd(*~x); } +KFR_SINTRIN bool bittestall(u8sse x) { return !_mm_movemask_epi8(*~x); } +KFR_SINTRIN bool bittestall(u16sse x) { return !_mm_movemask_epi8(*~x); } +KFR_SINTRIN bool bittestall(u32sse x) { return !_mm_movemask_epi8(*~x); } +KFR_SINTRIN bool bittestall(u64sse x) { return !_mm_movemask_epi8(*~x); } +KFR_SINTRIN bool bittestall(i8sse x) { return !_mm_movemask_epi8(*~x); } +KFR_SINTRIN bool bittestall(i16sse x) { return !_mm_movemask_epi8(*~x); } +KFR_SINTRIN bool bittestall(i32sse x) { return !_mm_movemask_epi8(*~x); } +KFR_SINTRIN bool bittestall(i64sse x) { return !_mm_movemask_epi8(*~x); } + +KFR_SINTRIN bool bittestall(f32sse x, f32sse y) { return bittestnone(~x & y); } +KFR_SINTRIN bool bittestall(f64sse x, f64sse y) { return bittestnone(~x & y); } +KFR_SINTRIN bool bittestall(u8sse x, u8sse y) { return bittestnone(~x & y); } +KFR_SINTRIN bool bittestall(u16sse x, u16sse y) { return bittestnone(~x & y); } +KFR_SINTRIN bool bittestall(u32sse x, u32sse y) { return bittestnone(~x & y); } +KFR_SINTRIN bool bittestall(u64sse x, u64sse y) { return bittestnone(~x & y); } +KFR_SINTRIN bool bittestall(i8sse x, i8sse y) { return bittestnone(~x & y); } +KFR_SINTRIN bool bittestall(i16sse x, i16sse y) { return bittestnone(~x & y); } +KFR_SINTRIN bool bittestall(i32sse x, i32sse y) { return bittestnone(~x & y); } +KFR_SINTRIN bool bittestall(i64sse x, i64sse y) { return bittestnone(~x & y); } + +#else + +template <typename T, size_t N> +KFR_SINTRIN bitmask<N> getmask(vec<T, N> x) +{ + typename bitmask<N>::type val = 0; + for (size_t i = 0; i < N; i++) { - return bittestnone(~x & y); + val |= (ubitcast(x[i]) >> (typebits<T>::bits - 1)) << i; } - - KFR_SPEC_FN(in_bittest, bittestnone) - KFR_SPEC_FN(in_bittest, bittestall) -}; - -#ifdef CID_ARCH_X86 - -template <> -struct in_bittest<cpu_t::sse2> -{ - constexpr static cpu_t cpu = cpu_t::sse2; - - KFR_SINTRIN bitmask<4> getmask(f32sse x) { return bitmask<4>(_mm_movemask_pd(*x)); } - KFR_SINTRIN bitmask<4> getmask(f64sse x) { return bitmask<4>(_mm_movemask_pd(*x)); } - KFR_SINTRIN bitmask<16> getmask(u8sse x) { return bitmask<16>(_mm_movemask_epi8(*x)); } - KFR_SINTRIN bitmask<16> getmask(u16sse x) { return bitmask<16>(_mm_movemask_epi8(*x)); } - KFR_SINTRIN bitmask<16> getmask(u32sse x) { return bitmask<16>(_mm_movemask_epi8(*x)); } - KFR_SINTRIN bitmask<16> getmask(u64sse x) { return bitmask<16>(_mm_movemask_epi8(*x)); } - KFR_SINTRIN bitmask<16> getmask(i8sse x) { return bitmask<16>(_mm_movemask_epi8(*x)); } - KFR_SINTRIN bitmask<16> getmask(i16sse x) { return bitmask<16>(_mm_movemask_epi8(*x)); } - KFR_SINTRIN bitmask<16> getmask(i32sse x) { return bitmask<16>(_mm_movemask_epi8(*x)); } - KFR_SINTRIN bitmask<16> getmask(i64sse x) { return bitmask<16>(_mm_movemask_epi8(*x)); } - - KFR_SINTRIN bool bittestnone(f32sse x) { return !_mm_movemask_ps(*x); } - KFR_SINTRIN bool bittestnone(f64sse x) { return !_mm_movemask_pd(*x); } - KFR_SINTRIN bool bittestnone(u8sse x) { return !_mm_movemask_epi8(*x); } - KFR_SINTRIN bool bittestnone(u16sse x) { return !_mm_movemask_epi8(*x); } - KFR_SINTRIN bool bittestnone(u32sse x) { return !_mm_movemask_epi8(*x); } - KFR_SINTRIN bool bittestnone(u64sse x) { return !_mm_movemask_epi8(*x); } - KFR_SINTRIN bool bittestnone(i8sse x) { return !_mm_movemask_epi8(*x); } - KFR_SINTRIN bool bittestnone(i16sse x) { return !_mm_movemask_epi8(*x); } - KFR_SINTRIN bool bittestnone(i32sse x) { return !_mm_movemask_epi8(*x); } - KFR_SINTRIN bool bittestnone(i64sse x) { return !_mm_movemask_epi8(*x); } - - KFR_SINTRIN bool bittestnone(f32sse x, f32sse y) { return bittestnone(x & y); } - KFR_SINTRIN bool bittestnone(f64sse x, f64sse y) { return bittestnone(x & y); } - KFR_SINTRIN bool bittestnone(u8sse x, u8sse y) { return bittestnone(x & y); } - KFR_SINTRIN bool bittestnone(u16sse x, u16sse y) { return bittestnone(x & y); } - KFR_SINTRIN bool bittestnone(u32sse x, u32sse y) { return bittestnone(x & y); } - KFR_SINTRIN bool bittestnone(u64sse x, u64sse y) { return bittestnone(x & y); } - KFR_SINTRIN bool bittestnone(i8sse x, i8sse y) { return bittestnone(x & y); } - KFR_SINTRIN bool bittestnone(i16sse x, i16sse y) { return bittestnone(x & y); } - KFR_SINTRIN bool bittestnone(i32sse x, i32sse y) { return bittestnone(x & y); } - KFR_SINTRIN bool bittestnone(i64sse x, i64sse y) { return bittestnone(x & y); } - - KFR_SINTRIN bool bittestall(f32sse x) { return !_mm_movemask_ps(*~x); } - KFR_SINTRIN bool bittestall(f64sse x) { return !_mm_movemask_pd(*~x); } - KFR_SINTRIN bool bittestall(u8sse x) { return !_mm_movemask_epi8(*~x); } - KFR_SINTRIN bool bittestall(u16sse x) { return !_mm_movemask_epi8(*~x); } - KFR_SINTRIN bool bittestall(u32sse x) { return !_mm_movemask_epi8(*~x); } - KFR_SINTRIN bool bittestall(u64sse x) { return !_mm_movemask_epi8(*~x); } - KFR_SINTRIN bool bittestall(i8sse x) { return !_mm_movemask_epi8(*~x); } - KFR_SINTRIN bool bittestall(i16sse x) { return !_mm_movemask_epi8(*~x); } - KFR_SINTRIN bool bittestall(i32sse x) { return !_mm_movemask_epi8(*~x); } - KFR_SINTRIN bool bittestall(i64sse x) { return !_mm_movemask_epi8(*~x); } - - KFR_SINTRIN bool bittestall(f32sse x, f32sse y) { return bittestnone(~x & y); } - KFR_SINTRIN bool bittestall(f64sse x, f64sse y) { return bittestnone(~x & y); } - KFR_SINTRIN bool bittestall(u8sse x, u8sse y) { return bittestnone(~x & y); } - KFR_SINTRIN bool bittestall(u16sse x, u16sse y) { return bittestnone(~x & y); } - KFR_SINTRIN bool bittestall(u32sse x, u32sse y) { return bittestnone(~x & y); } - KFR_SINTRIN bool bittestall(u64sse x, u64sse y) { return bittestnone(~x & y); } - KFR_SINTRIN bool bittestall(i8sse x, i8sse y) { return bittestnone(~x & y); } - KFR_SINTRIN bool bittestall(i16sse x, i16sse y) { return bittestnone(~x & y); } - KFR_SINTRIN bool bittestall(i32sse x, i32sse y) { return bittestnone(~x & y); } - KFR_SINTRIN bool bittestall(i64sse x, i64sse y) { return bittestnone(~x & y); } - - KFR_HANDLE_ALL_REDUCE(logical_and, bittestnone) - KFR_HANDLE_ALL_REDUCE(logical_and, bittestall) - KFR_SPEC_FN(in_bittest, bittestnone) - KFR_SPEC_FN(in_bittest, bittestall) -}; - -template <> -struct in_bittest<cpu_t::sse41> : in_bittest<cpu_t::sse2> -{ - constexpr static cpu_t cpu = cpu_t::sse41; - - KFR_SINTRIN bool bittestnone(f32sse x, f32sse y) { return _mm_testz_ps(*x, *y); } - KFR_SINTRIN bool bittestnone(f64sse x, f64sse y) { return _mm_testz_pd(*x, *y); } - KFR_SINTRIN bool bittestnone(u8sse x, u8sse y) { return _mm_testz_si128(*x, *y); } - KFR_SINTRIN bool bittestnone(u16sse x, u16sse y) { return _mm_testz_si128(*x, *y); } - KFR_SINTRIN bool bittestnone(u32sse x, u32sse y) { return _mm_testz_si128(*x, *y); } - KFR_SINTRIN bool bittestnone(u64sse x, u64sse y) { return _mm_testz_si128(*x, *y); } - KFR_SINTRIN bool bittestnone(i8sse x, i8sse y) { return _mm_testz_si128(*x, *y); } - KFR_SINTRIN bool bittestnone(i16sse x, i16sse y) { return _mm_testz_si128(*x, *y); } - KFR_SINTRIN bool bittestnone(i32sse x, i32sse y) { return _mm_testz_si128(*x, *y); } - KFR_SINTRIN bool bittestnone(i64sse x, i64sse y) { return _mm_testz_si128(*x, *y); } - - KFR_SINTRIN bool bittestnone(f32sse x) { return _mm_testz_ps(*x, *x); } - KFR_SINTRIN bool bittestnone(f64sse x) { return _mm_testz_pd(*x, *x); } - KFR_SINTRIN bool bittestnone(u8sse x) { return _mm_testz_si128(*x, *x); } - KFR_SINTRIN bool bittestnone(u16sse x) { return _mm_testz_si128(*x, *x); } - KFR_SINTRIN bool bittestnone(u32sse x) { return _mm_testz_si128(*x, *x); } - KFR_SINTRIN bool bittestnone(u64sse x) { return _mm_testz_si128(*x, *x); } - KFR_SINTRIN bool bittestnone(i8sse x) { return _mm_testz_si128(*x, *x); } - KFR_SINTRIN bool bittestnone(i16sse x) { return _mm_testz_si128(*x, *x); } - KFR_SINTRIN bool bittestnone(i32sse x) { return _mm_testz_si128(*x, *x); } - KFR_SINTRIN bool bittestnone(i64sse x) { return _mm_testz_si128(*x, *x); } - - KFR_SINTRIN bool bittestall(f32sse x, f32sse y) { return _mm_testc_ps(*x, *y); } - KFR_SINTRIN bool bittestall(f64sse x, f64sse y) { return _mm_testc_pd(*x, *y); } - KFR_SINTRIN bool bittestall(u8sse x, u8sse y) { return _mm_testc_si128(*x, *y); } - KFR_SINTRIN bool bittestall(u16sse x, u16sse y) { return _mm_testc_si128(*x, *y); } - KFR_SINTRIN bool bittestall(u32sse x, u32sse y) { return _mm_testc_si128(*x, *y); } - KFR_SINTRIN bool bittestall(u64sse x, u64sse y) { return _mm_testc_si128(*x, *y); } - KFR_SINTRIN bool bittestall(i8sse x, i8sse y) { return _mm_testc_si128(*x, *y); } - KFR_SINTRIN bool bittestall(i16sse x, i16sse y) { return _mm_testc_si128(*x, *y); } - KFR_SINTRIN bool bittestall(i32sse x, i32sse y) { return _mm_testc_si128(*x, *y); } - KFR_SINTRIN bool bittestall(i64sse x, i64sse y) { return _mm_testc_si128(*x, *y); } - - KFR_SINTRIN bool bittestall(f32sse x) { return _mm_testc_ps(*x, *allonesvector(x)); } - KFR_SINTRIN bool bittestall(f64sse x) { return _mm_testc_pd(*x, *allonesvector(x)); } - KFR_SINTRIN bool bittestall(u8sse x) { return _mm_testc_si128(*x, *allonesvector(x)); } - KFR_SINTRIN bool bittestall(u16sse x) { return _mm_testc_si128(*x, *allonesvector(x)); } - KFR_SINTRIN bool bittestall(u32sse x) { return _mm_testc_si128(*x, *allonesvector(x)); } - KFR_SINTRIN bool bittestall(u64sse x) { return _mm_testc_si128(*x, *allonesvector(x)); } - KFR_SINTRIN bool bittestall(i8sse x) { return _mm_testc_si128(*x, *allonesvector(x)); } - KFR_SINTRIN bool bittestall(i16sse x) { return _mm_testc_si128(*x, *allonesvector(x)); } - KFR_SINTRIN bool bittestall(i32sse x) { return _mm_testc_si128(*x, *allonesvector(x)); } - KFR_SINTRIN bool bittestall(i64sse x) { return _mm_testc_si128(*x, *allonesvector(x)); } - - KFR_HANDLE_ALL_REDUCE(logical_and, bittestnone) - KFR_HANDLE_ALL_REDUCE(logical_and, bittestall) - KFR_SPEC_FN(in_bittest, bittestnone) - KFR_SPEC_FN(in_bittest, bittestall) -}; - -template <> -struct in_bittest<cpu_t::avx1> : in_bittest<cpu_t::sse41> -{ - constexpr static cpu_t cpu = cpu_t::avx1; - using in_bittest<cpu_t::sse41>::bittestnone; - using in_bittest<cpu_t::sse41>::bittestall; - - KFR_SINTRIN bitmask<8> getmask(f32avx x) { return bitmask<8>(_mm256_movemask_pd(*x)); } - KFR_SINTRIN bitmask<8> getmask(f64avx x) { return bitmask<8>(_mm256_movemask_pd(*x)); } - - KFR_SINTRIN bool bittestnone(f32avx x, f32avx y) { return _mm256_testz_ps(*x, *y); } - KFR_SINTRIN bool bittestnone(f64avx x, f64avx y) { return _mm256_testz_pd(*x, *y); } - KFR_SINTRIN bool bittestnone(f32avx x) { return _mm256_testz_ps(*x, *x); } - KFR_SINTRIN bool bittestnone(f64avx x) { return _mm256_testz_pd(*x, *x); } - KFR_SINTRIN bool bittestnall(f32avx x, f32avx y) { return _mm256_testc_ps(*x, *y); } - KFR_SINTRIN bool bittestnall(f64avx x, f64avx y) { return _mm256_testc_pd(*x, *y); } - KFR_SINTRIN bool bittestnall(f32avx x) { return _mm256_testc_ps(*x, *allonesvector(x)); } - KFR_SINTRIN bool bittestnall(f64avx x) { return _mm256_testc_pd(*x, *allonesvector(x)); } - - KFR_HANDLE_ALL_REDUCE(logical_and, bittestnone) - KFR_HANDLE_ALL_REDUCE(logical_and, bittestall) - KFR_SPEC_FN(in_bittest, bittestnone) - KFR_SPEC_FN(in_bittest, bittestall) -}; - -template <> -struct in_bittest<cpu_t::avx2> : in_bittest<cpu_t::avx1> -{ - constexpr static cpu_t cpu = cpu_t::avx2; - using in_bittest<cpu_t::avx1>::bittestnone; - using in_bittest<cpu_t::avx1>::bittestall; - - KFR_SINTRIN bitmask<32> getmask(u8avx x) { return bitmask<32>(_mm256_movemask_epi8(*x)); } - KFR_SINTRIN bitmask<32> getmask(u16avx x) { return bitmask<32>(_mm256_movemask_epi8(*x)); } - KFR_SINTRIN bitmask<32> getmask(u32avx x) { return bitmask<32>(_mm256_movemask_epi8(*x)); } - KFR_SINTRIN bitmask<32> getmask(u64avx x) { return bitmask<32>(_mm256_movemask_epi8(*x)); } - KFR_SINTRIN bitmask<32> getmask(i8avx x) { return bitmask<32>(_mm256_movemask_epi8(*x)); } - KFR_SINTRIN bitmask<32> getmask(i16avx x) { return bitmask<32>(_mm256_movemask_epi8(*x)); } - KFR_SINTRIN bitmask<32> getmask(i32avx x) { return bitmask<32>(_mm256_movemask_epi8(*x)); } - KFR_SINTRIN bitmask<32> getmask(i64avx x) { return bitmask<32>(_mm256_movemask_epi8(*x)); } - - KFR_SINTRIN bool bittestnone(u8avx x, u8avx y) { return _mm256_testz_si256(*x, *y); } - KFR_SINTRIN bool bittestnone(u16avx x, u16avx y) { return _mm256_testz_si256(*x, *y); } - KFR_SINTRIN bool bittestnone(u32avx x, u32avx y) { return _mm256_testz_si256(*x, *y); } - KFR_SINTRIN bool bittestnone(u64avx x, u64avx y) { return _mm256_testz_si256(*x, *y); } - KFR_SINTRIN bool bittestnone(i8avx x, i8avx y) { return _mm256_testz_si256(*x, *y); } - KFR_SINTRIN bool bittestnone(i16avx x, i16avx y) { return _mm256_testz_si256(*x, *y); } - KFR_SINTRIN bool bittestnone(i32avx x, i32avx y) { return _mm256_testz_si256(*x, *y); } - KFR_SINTRIN bool bittestnone(i64avx x, i64avx y) { return _mm256_testz_si256(*x, *y); } - - KFR_SINTRIN bool bittestnone(u8avx x) { return _mm256_testz_si256(*x, *x); } - KFR_SINTRIN bool bittestnone(u16avx x) { return _mm256_testz_si256(*x, *x); } - KFR_SINTRIN bool bittestnone(u32avx x) { return _mm256_testz_si256(*x, *x); } - KFR_SINTRIN bool bittestnone(u64avx x) { return _mm256_testz_si256(*x, *x); } - KFR_SINTRIN bool bittestnone(i8avx x) { return _mm256_testz_si256(*x, *x); } - KFR_SINTRIN bool bittestnone(i16avx x) { return _mm256_testz_si256(*x, *x); } - KFR_SINTRIN bool bittestnone(i32avx x) { return _mm256_testz_si256(*x, *x); } - KFR_SINTRIN bool bittestnone(i64avx x) { return _mm256_testz_si256(*x, *x); } - - KFR_SINTRIN bool bittestall(u8avx x, u8avx y) { return _mm256_testc_si256(*x, *y); } - KFR_SINTRIN bool bittestall(u16avx x, u16avx y) { return _mm256_testc_si256(*x, *y); } - KFR_SINTRIN bool bittestall(u32avx x, u32avx y) { return _mm256_testc_si256(*x, *y); } - KFR_SINTRIN bool bittestall(u64avx x, u64avx y) { return _mm256_testc_si256(*x, *y); } - KFR_SINTRIN bool bittestall(i8avx x, i8avx y) { return _mm256_testc_si256(*x, *y); } - KFR_SINTRIN bool bittestall(i16avx x, i16avx y) { return _mm256_testc_si256(*x, *y); } - KFR_SINTRIN bool bittestall(i32avx x, i32avx y) { return _mm256_testc_si256(*x, *y); } - KFR_SINTRIN bool bittestall(i64avx x, i64avx y) { return _mm256_testc_si256(*x, *y); } - - KFR_SINTRIN bool bittestall(u8avx x) { return _mm256_testc_si256(*x, *allonesvector(x)); } - KFR_SINTRIN bool bittestall(u16avx x) { return _mm256_testc_si256(*x, *allonesvector(x)); } - KFR_SINTRIN bool bittestall(u32avx x) { return _mm256_testc_si256(*x, *allonesvector(x)); } - KFR_SINTRIN bool bittestall(u64avx x) { return _mm256_testc_si256(*x, *allonesvector(x)); } - KFR_SINTRIN bool bittestall(i8avx x) { return _mm256_testc_si256(*x, *allonesvector(x)); } - KFR_SINTRIN bool bittestall(i16avx x) { return _mm256_testc_si256(*x, *allonesvector(x)); } - KFR_SINTRIN bool bittestall(i32avx x) { return _mm256_testc_si256(*x, *allonesvector(x)); } - KFR_SINTRIN bool bittestall(i64avx x) { return _mm256_testc_si256(*x, *allonesvector(x)); } - - KFR_HANDLE_ALL_REDUCE(logical_and, bittestnone) - KFR_HANDLE_ALL_REDUCE(logical_and, bittestall) - KFR_SPEC_FN(in_bittest, bittestnone) - KFR_SPEC_FN(in_bittest, bittestall) -}; -#endif + return val; } -namespace native -{ -using fn_bittestnone = internal::in_bittest<>::fn_bittestnone; -template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> bittestnone(const T1& x) +template <typename T, size_t N> +KFR_SINTRIN bool bittestnone(vec<T, N> x) { - return internal::in_bittest<>::bittestnone(x); + return !getmask(x).value; } - -template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_bittestnone, E1> bittestnone(E1&& x) +template <typename T, size_t N> +KFR_SINTRIN bool bittestnone(vec<T, N> x, vec<T, N> y) { - return { fn_bittestnone(), std::forward<E1>(x) }; + return bittestnone(x & y); } -using fn_bittestall = internal::in_bittest<>::fn_bittestall; -template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> bittestall(const T1& x) +template <typename T, size_t N> +KFR_SINTRIN bool bittestall(vec<T, N> x) { - return internal::in_bittest<>::bittestall(x); + return !getmask(~x).value; } - -template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_bittestall, E1> bittestall(E1&& x) +template <typename T, size_t N> +KFR_SINTRIN bool bittestall(vec<T, N> x, vec<T, N> y) { - return { fn_bittestall(), std::forward<E1>(x) }; + return bittestnone(~x & y); } - -using fn_bittestnone = internal::in_bittest<>::fn_bittestnone; -template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)> -KFR_INLINE ftype<common_type<T1, T2>> bittestnone(const T1& x, const T2& y) -{ - return internal::in_bittest<>::bittestnone(x, y); +#endif } -template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)> -KFR_INLINE expr_func<fn_bittestnone, E1, E2> bittestnone(E1&& x, E2&& y) -{ - return { fn_bittestnone(), std::forward<E1>(x), std::forward<E2>(y) }; -} -using fn_bittestall = internal::in_bittest<>::fn_bittestall; -template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)> -KFR_INLINE ftype<common_type<T1, T2>> bittestall(const T1& x, const T2& y) +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> +KFR_INTRIN T1 bittestnone(const T1& x) { - return internal::in_bittest<>::bittestall(x, y); + return internal::bittestnone(x); } -template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)> -KFR_INLINE expr_func<fn_bittestall, E1, E2> bittestall(E1&& x, E2&& y) +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> +KFR_INTRIN T1 bittestall(const T1& x) { - return { fn_bittestall(), std::forward<E1>(x), std::forward<E2>(y) }; -} + return internal::bittestall(x); } } diff --git a/include/kfr/base/min_max.hpp b/include/kfr/base/min_max.hpp @@ -27,383 +27,173 @@ #include "operators.hpp" #include "select.hpp" -#pragma clang diagnostic push -#if CID_HAS_WARNING("-Winaccessible-base") -#pragma clang diagnostic ignored "-Winaccessible-base" -#endif - namespace kfr { namespace internal { -template <cpu_t cpu = cpu_t::native, cpu_t cc = cpu> -struct in_min_max : in_min_max<older(cpu), cc> -{ - struct fn_min : in_min_max<older(cpu), cc>::fn_min, fn_disabled - { - }; - struct fn_max : in_min_max<older(cpu), cc>::fn_max, fn_disabled - { - }; -}; - -template <cpu_t cc> -struct in_min_max<cpu_t::common, cc> : in_select<cc> -{ - constexpr static cpu_t cpu = cpu_t::common; - -private: - using in_select<cc>::select; - -public: - template <typename T> - KFR_SINTRIN T min(initialvalue<T>) - { - return std::numeric_limits<T>::max(); - } - template <typename T> - KFR_SINTRIN T max(initialvalue<T>) - { - return std::numeric_limits<T>::min(); - } - - template <typename T, size_t N> - KFR_SINTRIN vec<T, N> min(vec<T, N> x, vec<T, N> y) - { - return select(x < y, x, y); - } - template <typename T, size_t N> - KFR_SINTRIN vec<T, N> max(vec<T, N> x, vec<T, N> y) - { - return select(x > y, x, y); - } - - KFR_HANDLE_SCALAR(min) - KFR_HANDLE_SCALAR(max) - KFR_SPEC_FN(in_min_max, min) - KFR_SPEC_FN(in_min_max, max) -}; - -#ifdef CID_ARCH_X86 - -template <cpu_t cc> -struct in_min_max<cpu_t::sse2, cc> : in_select<cc> -{ - constexpr static cpu_t cpu = cpu_t::sse2; - -private: - using in_select<cc>::select; - -public: - template <typename T> - KFR_SINTRIN T min(initialvalue<T>) - { - return std::numeric_limits<T>::max(); - } - template <typename T> - KFR_SINTRIN T max(initialvalue<T>) - { - return std::numeric_limits<T>::min(); - } - - KFR_CPU_INTRIN(sse2) f32sse min(f32sse x, f32sse y) { return _mm_min_ps(*x, *y); } - KFR_CPU_INTRIN(sse2) f64sse min(f64sse x, f64sse y) { return _mm_min_pd(*x, *y); } - KFR_CPU_INTRIN(sse2) i8sse min(i8sse x, i8sse y) { return select(x < y, x, y); } - KFR_CPU_INTRIN(sse2) u16sse min(u16sse x, u16sse y) { return select(x < y, x, y); } - KFR_CPU_INTRIN(sse2) i32sse min(i32sse x, i32sse y) { return select(x < y, x, y); } - KFR_CPU_INTRIN(sse2) u32sse min(u32sse x, u32sse y) { return select(x < y, x, y); } - KFR_CPU_INTRIN(sse2) u8sse min(u8sse x, u8sse y) { return _mm_min_epu8(*x, *y); } - KFR_CPU_INTRIN(sse2) i16sse min(i16sse x, i16sse y) { return _mm_min_epi16(*x, *y); } - KFR_CPU_INTRIN(sse2) i64sse min(i64sse x, i64sse y) { return select(x < y, x, y); } - KFR_CPU_INTRIN(sse2) u64sse min(u64sse x, u64sse y) { return select(x < y, x, y); } - - KFR_CPU_INTRIN(sse2) f32sse max(f32sse x, f32sse y) { return _mm_max_ps(*x, *y); } - KFR_CPU_INTRIN(sse2) f64sse max(f64sse x, f64sse y) { return _mm_max_pd(*x, *y); } - KFR_CPU_INTRIN(sse2) i8sse max(i8sse x, i8sse y) { return select(x > y, x, y); } - KFR_CPU_INTRIN(sse2) u16sse max(u16sse x, u16sse y) { return select(x > y, x, y); } - KFR_CPU_INTRIN(sse2) i32sse max(i32sse x, i32sse y) { return select(x > y, x, y); } - KFR_CPU_INTRIN(sse2) u32sse max(u32sse x, u32sse y) { return select(x > y, x, y); } - KFR_CPU_INTRIN(sse2) u8sse max(u8sse x, u8sse y) { return _mm_max_epu8(*x, *y); } - KFR_CPU_INTRIN(sse2) i16sse max(i16sse x, i16sse y) { return _mm_max_epi16(*x, *y); } - KFR_CPU_INTRIN(sse2) i64sse max(i64sse x, i64sse y) { return select(x > y, x, y); } - KFR_CPU_INTRIN(sse2) u64sse max(u64sse x, u64sse y) { return select(x > y, x, y); } - - KFR_HANDLE_ALL(min) - KFR_HANDLE_ALL(max) - KFR_HANDLE_SCALAR(min) - KFR_HANDLE_SCALAR(max) - KFR_SPEC_FN(in_min_max, min) - KFR_SPEC_FN(in_min_max, max) -}; - -template <cpu_t cc> -struct in_min_max<cpu_t::sse41, cc> : in_min_max<cpu_t::sse2> -{ - constexpr static cpu_t cpu = cpu_t::sse41; - using in_min_max<cpu_t::sse2>::min; - using in_min_max<cpu_t::sse2>::max; - - KFR_CPU_INTRIN(sse41) i8sse min(i8sse x, i8sse y) { return _mm_min_epi8(*x, *y); } - KFR_CPU_INTRIN(sse41) u16sse min(u16sse x, u16sse y) { return _mm_min_epu16(*x, *y); } - KFR_CPU_INTRIN(sse41) i32sse min(i32sse x, i32sse y) { return _mm_min_epi32(*x, *y); } - KFR_CPU_INTRIN(sse41) u32sse min(u32sse x, u32sse y) { return _mm_min_epu32(*x, *y); } - - KFR_CPU_INTRIN(sse41) i8sse max(i8sse x, i8sse y) { return _mm_max_epi8(*x, *y); } - KFR_CPU_INTRIN(sse41) u16sse max(u16sse x, u16sse y) { return _mm_max_epu16(*x, *y); } - KFR_CPU_INTRIN(sse41) i32sse max(i32sse x, i32sse y) { return _mm_max_epi32(*x, *y); } - KFR_CPU_INTRIN(sse41) u32sse max(u32sse x, u32sse y) { return _mm_max_epu32(*x, *y); } - - KFR_HANDLE_ALL(min) - KFR_HANDLE_ALL(max) - KFR_HANDLE_SCALAR(min) - KFR_HANDLE_SCALAR(max) - KFR_SPEC_FN(in_min_max, min) - KFR_SPEC_FN(in_min_max, max) -}; - -template <cpu_t cc> -struct in_min_max<cpu_t::avx1, cc> : in_min_max<cpu_t::sse41> -{ - constexpr static cpu_t cpu = cpu_t::avx1; - using in_min_max<cpu_t::sse41>::min; - using in_min_max<cpu_t::sse41>::max; - - KFR_CPU_INTRIN(avx) f32avx min(f32avx x, f32avx y) { return _mm256_min_ps(*x, *y); } - KFR_CPU_INTRIN(avx) f64avx min(f64avx x, f64avx y) { return _mm256_min_pd(*x, *y); } - KFR_CPU_INTRIN(avx) f32avx max(f32avx x, f32avx y) { return _mm256_max_ps(*x, *y); } - KFR_CPU_INTRIN(avx) f64avx max(f64avx x, f64avx y) { return _mm256_max_pd(*x, *y); } - - KFR_HANDLE_ALL(min) - KFR_HANDLE_ALL(max) - KFR_HANDLE_SCALAR(min) - KFR_HANDLE_SCALAR(max) - KFR_SPEC_FN(in_min_max, min) - KFR_SPEC_FN(in_min_max, max) -}; - -template <cpu_t cc> -struct in_min_max<cpu_t::avx2, cc> : in_min_max<cpu_t::avx1>, in_select<cpu_t::avx2> -{ - constexpr static cpu_t cpu = cpu_t::avx2; - -private: - using in_select<cpu>::select; - -public: - using in_min_max<cpu_t::avx1>::min; - using in_min_max<cpu_t::avx1>::max; - - KFR_CPU_INTRIN(avx2) u8avx min(u8avx x, u8avx y) { return _mm256_min_epu8(*x, *y); } - KFR_CPU_INTRIN(avx2) i16avx min(i16avx x, i16avx y) { return _mm256_min_epi16(*x, *y); } - KFR_CPU_INTRIN(avx2) i8avx min(i8avx x, i8avx y) { return _mm256_min_epi8(*x, *y); } - KFR_CPU_INTRIN(avx2) u16avx min(u16avx x, u16avx y) { return _mm256_min_epu16(*x, *y); } - KFR_CPU_INTRIN(avx2) i32avx min(i32avx x, i32avx y) { return _mm256_min_epi32(*x, *y); } - KFR_CPU_INTRIN(avx2) u32avx min(u32avx x, u32avx y) { return _mm256_min_epu32(*x, *y); } - - KFR_CPU_INTRIN(avx2) u8avx max(u8avx x, u8avx y) { return _mm256_max_epu8(*x, *y); } - KFR_CPU_INTRIN(avx2) i16avx max(i16avx x, i16avx y) { return _mm256_max_epi16(*x, *y); } - KFR_CPU_INTRIN(avx2) i8avx max(i8avx x, i8avx y) { return _mm256_max_epi8(*x, *y); } - KFR_CPU_INTRIN(avx2) u16avx max(u16avx x, u16avx y) { return _mm256_max_epu16(*x, *y); } - KFR_CPU_INTRIN(avx2) i32avx max(i32avx x, i32avx y) { return _mm256_max_epi32(*x, *y); } - KFR_CPU_INTRIN(avx2) u32avx max(u32avx x, u32avx y) { return _mm256_max_epu32(*x, *y); } +#if defined CID_ARCH_SSE2 + +KFR_SINTRIN f32sse min(f32sse x, f32sse y) { return _mm_min_ps(*x, *y); } +KFR_SINTRIN f64sse min(f64sse x, f64sse y) { return _mm_min_pd(*x, *y); } +KFR_SINTRIN u8sse min(u8sse x, u8sse y) { return _mm_min_epu8(*x, *y); } +KFR_SINTRIN i16sse min(i16sse x, i16sse y) { return _mm_min_epi16(*x, *y); } +KFR_SINTRIN i64sse min(i64sse x, i64sse y) { return select(x < y, x, y); } +KFR_SINTRIN u64sse min(u64sse x, u64sse y) { return select(x < y, x, y); } + +KFR_SINTRIN f32sse max(f32sse x, f32sse y) { return _mm_max_ps(*x, *y); } +KFR_SINTRIN f64sse max(f64sse x, f64sse y) { return _mm_max_pd(*x, *y); } +KFR_SINTRIN u8sse max(u8sse x, u8sse y) { return _mm_max_epu8(*x, *y); } +KFR_SINTRIN i16sse max(i16sse x, i16sse y) { return _mm_max_epi16(*x, *y); } +KFR_SINTRIN i64sse max(i64sse x, i64sse y) { return select(x > y, x, y); } +KFR_SINTRIN u64sse max(u64sse x, u64sse y) { return select(x > y, x, y); } + +#if defined CID_ARCH_AVX2 +KFR_SINTRIN u8avx min(u8avx x, u8avx y) { return _mm256_min_epu8(*x, *y); } +KFR_SINTRIN i16avx min(i16avx x, i16avx y) { return _mm256_min_epi16(*x, *y); } +KFR_SINTRIN i8avx min(i8avx x, i8avx y) { return _mm256_min_epi8(*x, *y); } +KFR_SINTRIN u16avx min(u16avx x, u16avx y) { return _mm256_min_epu16(*x, *y); } +KFR_SINTRIN i32avx min(i32avx x, i32avx y) { return _mm256_min_epi32(*x, *y); } +KFR_SINTRIN u32avx min(u32avx x, u32avx y) { return _mm256_min_epu32(*x, *y); } + +KFR_SINTRIN u8avx max(u8avx x, u8avx y) { return _mm256_max_epu8(*x, *y); } +KFR_SINTRIN i16avx max(i16avx x, i16avx y) { return _mm256_max_epi16(*x, *y); } +KFR_SINTRIN i8avx max(i8avx x, i8avx y) { return _mm256_max_epi8(*x, *y); } +KFR_SINTRIN u16avx max(u16avx x, u16avx y) { return _mm256_max_epu16(*x, *y); } +KFR_SINTRIN i32avx max(i32avx x, i32avx y) { return _mm256_max_epi32(*x, *y); } +KFR_SINTRIN u32avx max(u32avx x, u32avx y) { return _mm256_max_epu32(*x, *y); } + +KFR_SINTRIN i64avx min(i64avx x, i64avx y) { return select(x < y, x, y); } +KFR_SINTRIN u64avx min(u64avx x, u64avx y) { return select(x < y, x, y); } +KFR_SINTRIN i64avx max(i64avx x, i64avx y) { return select(x > y, x, y); } +KFR_SINTRIN u64avx max(u64avx x, u64avx y) { return select(x > y, x, y); } +#endif - KFR_CPU_INTRIN(avx2) i64avx min(i64avx x, i64avx y) { return select(x < y, x, y); } - KFR_CPU_INTRIN(avx2) u64avx min(u64avx x, u64avx y) { return select(x < y, x, y); } - KFR_CPU_INTRIN(avx2) i64avx max(i64avx x, i64avx y) { return select(x > y, x, y); } - KFR_CPU_INTRIN(avx2) u64avx max(u64avx x, u64avx y) { return select(x > y, x, y); } +#if defined CID_ARCH_AVX +KFR_SINTRIN f32avx min(f32avx x, f32avx y) { return _mm256_min_ps(*x, *y); } +KFR_SINTRIN f64avx min(f64avx x, f64avx y) { return _mm256_min_pd(*x, *y); } +KFR_SINTRIN f32avx max(f32avx x, f32avx y) { return _mm256_max_ps(*x, *y); } +KFR_SINTRIN f64avx max(f64avx x, f64avx y) { return _mm256_max_pd(*x, *y); } +#endif - KFR_HANDLE_ALL(min) - KFR_HANDLE_ALL(max) - KFR_HANDLE_SCALAR(min) - KFR_HANDLE_SCALAR(max) - KFR_SPEC_FN(in_min_max, min) - KFR_SPEC_FN(in_min_max, max) -}; +#if defined CID_ARCH_SSE41 +KFR_SINTRIN i8sse min(i8sse x, i8sse y) { return _mm_min_epi8(*x, *y); } +KFR_SINTRIN u16sse min(u16sse x, u16sse y) { return _mm_min_epu16(*x, *y); } +KFR_SINTRIN i32sse min(i32sse x, i32sse y) { return _mm_min_epi32(*x, *y); } +KFR_SINTRIN u32sse min(u32sse x, u32sse y) { return _mm_min_epu32(*x, *y); } + +KFR_SINTRIN i8sse max(i8sse x, i8sse y) { return _mm_max_epi8(*x, *y); } +KFR_SINTRIN u16sse max(u16sse x, u16sse y) { return _mm_max_epu16(*x, *y); } +KFR_SINTRIN i32sse max(i32sse x, i32sse y) { return _mm_max_epi32(*x, *y); } +KFR_SINTRIN u32sse max(u32sse x, u32sse y) { return _mm_max_epu32(*x, *y); } +#else +KFR_SINTRIN i8sse min(i8sse x, i8sse y) { return select(x < y, x, y); } +KFR_SINTRIN u16sse min(u16sse x, u16sse y) { return select(x < y, x, y); } +KFR_SINTRIN i32sse min(i32sse x, i32sse y) { return select(x < y, x, y); } +KFR_SINTRIN u32sse min(u32sse x, u32sse y) { return select(x < y, x, y); } + +KFR_SINTRIN i8sse max(i8sse x, i8sse y) { return select(x > y, x, y); } +KFR_SINTRIN u16sse max(u16sse x, u16sse y) { return select(x > y, x, y); } +KFR_SINTRIN i32sse max(i32sse x, i32sse y) { return select(x > y, x, y); } +KFR_SINTRIN u32sse max(u32sse x, u32sse y) { return select(x > y, x, y); } #endif -template <cpu_t cpu = cpu_t::native> -struct in_minabs_maxabs -{ -public: - template <typename T, size_t N> - KFR_SINTRIN vec<T, N> minabs(vec<T, N> x, vec<T, N> y) - { - return in_min_max<cpu>::min(in_abs<cpu>::abs(x), in_abs<cpu>::abs(y)); - } - template <typename T, size_t N> - KFR_SINTRIN vec<T, N> maxabs(vec<T, N> x, vec<T, N> y) - { - return in_min_max<cpu>::max(in_abs<cpu>::abs(x), in_abs<cpu>::abs(y)); - } +KFR_HANDLE_ALL_SIZES_2(min) +KFR_HANDLE_ALL_SIZES_2(max) - KFR_HANDLE_ALL(minabs) - KFR_HANDLE_ALL(maxabs) - KFR_HANDLE_SCALAR(min) - KFR_HANDLE_SCALAR(max) - KFR_SPEC_FN(in_minabs_maxabs, minabs) - KFR_SPEC_FN(in_minabs_maxabs, maxabs) -}; +#else -template <cpu_t cpu = cpu_t::native> -struct in_clamp : in_min_max<cpu> +// fallback +template <typename T, size_t N> +KFR_SINTRIN vec<T, N> min(vec<T, N> x, vec<T, N> y) { - using in_min_max<cpu>::min; - using in_min_max<cpu>::max; - - template <typename T, size_t N> - KFR_SINTRIN vec<T, N> clamp(vec<T, N> x, T minimum, T maximum) - { - return clamp(x, broadcast<N>(minimum), broadcast<N>(maximum)); - } - template <typename T, size_t N> - KFR_SINTRIN vec<T, N> clamp(vec<T, N> x, T minimum, vec<T, N> maximum) - { - return clamp(x, broadcast<N>(minimum), maximum); - } - template <typename T, size_t N> - KFR_SINTRIN vec<T, N> clamp(vec<T, N> x, vec<T, N> minimum, T maximum) - { - return clamp(x, minimum, broadcast<N>(maximum)); - } - template <typename T, size_t N> - KFR_SINTRIN vec<T, N> clamp(vec<T, N> x, T maximum) - { - return clamp(x, broadcast<N>(maximum)); - } - - template <typename T, size_t N> - KFR_SINTRIN vec<T, N> clamp(vec<T, N> x, vec<T, N> minimum, vec<T, N> maximum) - { - return max(minimum, min(x, maximum)); - } - template <typename T, size_t N> - KFR_SINTRIN vec<T, N> clamp(vec<T, N> x, vec<T, N> maximum) - { - return max(zerovector<T, N>(), min(x, maximum)); - } - - template <typename T, size_t N> - KFR_SINTRIN vec<T, N> clampm1(vec<T, N> x, vec<T, N> minimum, vec<T, N> maximum) - { - return max(minimum, min(x, maximum - T(1))); - } - template <typename T, size_t N> - KFR_SINTRIN vec<T, N> clampm1(vec<T, N> x, vec<T, N> maximum) - { - return max(zerovector<T, N>(), min(x, maximum - T(1))); - } - KFR_HANDLE_ALL(clamp) - KFR_HANDLE_ALL(clampm1) - KFR_HANDLE_SCALAR(min) - KFR_HANDLE_SCALAR(max) - KFR_SPEC_FN(in_clamp, clamp) - KFR_SPEC_FN(in_clamp, clampm1) -}; + return select(x < y, x, y); } - -namespace native -{ -using fn_min = internal::in_min_max<>::fn_min; -template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)> -KFR_INLINE ftype<common_type<T1, T2>> min(const T1& x, const T2& y) +template <typename T, size_t N> +KFR_SINTRIN vec<T, N> max(vec<T, N> x, vec<T, N> y) { - return internal::in_min_max<>::min(x, y); + return select(x > y, x, y); } +#endif -template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)> -KFR_INLINE expr_func<fn_min, E1, E2> min(E1&& x, E2&& y) +template <typename T> +KFR_SINTRIN T min(initialvalue<T>) { - return { fn_min(), std::forward<E1>(x), std::forward<E2>(y) }; + return std::numeric_limits<T>::max(); } -using fn_max = internal::in_min_max<>::fn_max; -template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)> -KFR_INLINE ftype<common_type<T1, T2>> max(const T1& x, const T2& y) +template <typename T> +KFR_SINTRIN T max(initialvalue<T>) { - return internal::in_min_max<>::max(x, y); + return std::numeric_limits<T>::min(); } - -template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)> -KFR_INLINE expr_func<fn_max, E1, E2> max(E1&& x, E2&& y) +template <typename T> +KFR_SINTRIN T absmin(initialvalue<T>) { - return { fn_max(), std::forward<E1>(x), std::forward<E2>(y) }; + return std::numeric_limits<T>::max(); } -using fn_minabs = internal::in_minabs_maxabs<>::fn_minabs; -template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)> -KFR_INLINE ftype<common_type<T1, T2>> minabs(const T1& x, const T2& y) +template <typename T> +KFR_SINTRIN T absmax(initialvalue<T>) { - return internal::in_minabs_maxabs<>::minabs(x, y); + return 0; } -template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)> -KFR_INLINE expr_func<fn_minabs, E1, E2> minabs(E1&& x, E2&& y) -{ - return { fn_minabs(), std::forward<E1>(x), std::forward<E2>(y) }; +KFR_HANDLE_SCALAR_2(min) +KFR_FN(min) +KFR_HANDLE_SCALAR_2(max) +KFR_FN(max) +KFR_HANDLE_SCALAR_2(absmin) +KFR_FN(absmin) +KFR_HANDLE_SCALAR_2(absmax) +KFR_FN(absmax) } -using fn_maxabs = internal::in_minabs_maxabs<>::fn_maxabs; + template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)> -KFR_INLINE ftype<common_type<T1, T2>> maxabs(const T1& x, const T2& y) +KFR_INTRIN common_type<T1, T2> min(const T1& x, const T2& y) { - return internal::in_minabs_maxabs<>::maxabs(x, y); + return internal::min(x, y); } template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)> -KFR_INLINE expr_func<fn_maxabs, E1, E2> maxabs(E1&& x, E2&& y) -{ - return { fn_maxabs(), std::forward<E1>(x), std::forward<E2>(y) }; -} -using fn_clamp = internal::in_clamp<>::fn_clamp; -template <typename T1, typename T2, typename T3, KFR_ENABLE_IF(is_numeric_args<T1, T2, T3>::value)> -KFR_INLINE ftype<common_type<T1, T2, T3>> clamp(const T1& x, const T2& l, const T3& h) +KFR_INTRIN expr_func<internal::fn_min, E1, E2> min(E1&& x, E2&& y) { - return internal::in_clamp<>::clamp(x, l, h); + return { {}, std::forward<E1>(x), std::forward<E2>(y) }; } -template <typename E1, typename E2, typename E3, KFR_ENABLE_IF(is_input_expressions<E1, E2, E3>::value)> -KFR_INLINE expr_func<fn_clamp, E1, E2, E3> clamp(E1&& x, E2&& l, E3&& h) -{ - return { fn_clamp(), std::forward<E1>(x), std::forward<E2>(l), std::forward<E3>(h) }; -} -using fn_clampm1 = internal::in_clamp<>::fn_clampm1; -template <typename T1, typename T2, typename T3, KFR_ENABLE_IF(is_numeric_args<T1, T2, T3>::value)> -KFR_INLINE ftype<common_type<T1, T2, T3>> clampm1(const T1& x, const T2& l, const T3& h) +template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)> +KFR_INTRIN common_type<T1, T2> max(const T1& x, const T2& y) { - return internal::in_clamp<>::clampm1(x, l, h); + return internal::max(x, y); } -template <typename E1, typename E2, typename E3, KFR_ENABLE_IF(is_input_expressions<E1, E2, E3>::value)> -KFR_INLINE expr_func<fn_clampm1, E1, E2, E3> clampm1(E1&& x, E2&& l, E3&& h) +template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)> +KFR_INTRIN expr_func<internal::fn_max, E1, E2> max(E1&& x, E2&& y) { - return { fn_clampm1(), std::forward<E1>(x), std::forward<E2>(l), std::forward<E3>(h) }; + return { {}, std::forward<E1>(x), std::forward<E2>(y) }; } -using fn_clamp = internal::in_clamp<>::fn_clamp; template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)> -KFR_INLINE ftype<common_type<T1, T2>> clamp(const T1& x, const T2& h) +KFR_INTRIN common_type<T1, T2> absmin(const T1& x, const T2& y) { - return internal::in_clamp<>::clamp(x, h); + return internal::absmin(x, y); } template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)> -KFR_INLINE expr_func<fn_clamp, E1, E2> clamp(E1&& x, E2&& h) +KFR_INTRIN expr_func<internal::fn_absmin, E1, E2> absmin(E1&& x, E2&& y) { - return { fn_clamp(), std::forward<E1>(x), std::forward<E2>(h) }; + return { {}, std::forward<E1>(x), std::forward<E2>(y) }; } -using fn_clampm1 = internal::in_clamp<>::fn_clampm1; + template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)> -KFR_INLINE ftype<common_type<T1, T2>> clampm1(const T1& x, const T2& h) +KFR_INTRIN common_type<T1, T2> absmax(const T1& x, const T2& y) { - return internal::in_clamp<>::clampm1(x, h); + return internal::absmax(x, y); } template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)> -KFR_INLINE expr_func<fn_clampm1, E1, E2> clampm1(E1&& x, E2&& h) +KFR_INTRIN expr_func<internal::fn_absmax, E1, E2> absmax(E1&& x, E2&& y) { - return { fn_clampm1(), std::forward<E1>(x), std::forward<E2>(h) }; -} + return { {}, std::forward<E1>(x), std::forward<E2>(y) }; } } - -#pragma clang diagnostic pop diff --git a/include/kfr/base/modzerobessel.hpp b/include/kfr/base/modzerobessel.hpp @@ -0,0 +1,113 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once +#include "function.hpp" +#include "log_exp.hpp" + +#pragma clang diagnostic push +#if CID_HAS_WARNING("-Wc99-extensions") +#pragma clang diagnostic ignored "-Wc99-extensions" +#endif + +namespace kfr +{ + +namespace internal +{ + +template <typename T> +constexpr T bessel_coef[] = { T(0.25), + T(0.027777777777777776236), + T(0.0017361111111111110147), + T(6.9444444444444444384e-005), + T(1.9290123456790123911e-006), + T(3.9367598891408417495e-008), + T(6.1511873267825652335e-010), + T(7.5940584281266239246e-012), + T(7.5940584281266233693e-014), + T(6.2760813455591932909e-016), + T(4.3583898233049949985e-018), + T(2.5789288895295827557e-020), + T(1.3157800456783586208e-022), + T(5.8479113141260384983e-025), + T(2.2843403570804837884e-027), + T(7.904291893012054025e-030), + T(2.4395962632753252792e-032), + T(6.75788438580422547e-035), + T(1.689471096451056426e-037), + T(3.8310002187098784929e-040), + T(7.9152897080782616517e-043), + T(1.4962740468957016443e-045), + T(2.5976979980828152196e-048), + T(4.1563167969325041577e-051), + T(6.1483976285983795968e-054), + T(8.434015951438105991e-057), + T(1.0757673407446563809e-059), + T(1.2791526049282476926e-062), + T(1.4212806721424974034e-065), + T(1.4789601166935457918e-068), + T(1.4442969889585408123e-071), + T(1.3262598613026086927e-074), + T(1.1472836170437790782e-077), + T(9.3655805472961564331e-081), + T(7.2265282000741942594e-084), + T(5.2786911614858977913e-087), + T(3.6556032974279072401e-090), + T(2.4034209713529963119e-093), + T(1.5021381070956226783e-096) }; + +template <typename T, size_t N> +KFR_INLINE vec<T, N> modzerobessel(vec<T, N> x) +{ + const vec<T, N> x_2 = x * 0.5; + const vec<T, N> x_2_sqr = x_2 * x_2; + vec<T, N> num = x_2_sqr; + vec<T, N> result; + result = 1 + x_2_sqr; + + KFR_LOOP_UNROLL + for (size_t i = 0; i < (sizeof(T) == 4 ? 20 : 39); i++) + { + result = fmadd((num *= x_2_sqr), bessel_coef<T>[i], result); + } + return result; +} + +KFR_HANDLE_SCALAR(modzerobessel) +KFR_FN(modzerobessel) +} + +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> +KFR_INTRIN T1 modzerobessel(const T1& x) +{ + return internal::modzerobessel(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> +KFR_INTRIN expr_func<internal::fn_modzerobessel, E1> modzerobessel(E1&& x) +{ + return { {}, std::forward<E1>(x) }; +} +} + +#pragma clang diagnostic pop diff --git a/include/kfr/base/operators.hpp b/include/kfr/base/operators.hpp @@ -660,4 +660,40 @@ KFR_INLINE vec<T, N> negodd(const vec<T, N>& x) { return x ^ broadcast<N>(T(), -T()); } + + +#define KFR_EXPR_UNARY(fn, op) \ + template <typename A1, KFR_ENABLE_IF(is_input_expression<A1>::value)> \ + KFR_INLINE auto operator op(A1&& a1)->decltype(bind_expression(fn(), std::forward<A1>(a1))) \ + { \ + return bind_expression(fn(), std::forward<A1>(a1)); \ + } + +#define KFR_EXPR_BINARY(fn, op) \ + template <typename A1, typename A2, KFR_ENABLE_IF(is_input_expressions<A1, A2>::value)> \ + KFR_INLINE auto operator op(A1&& a1, A2&& a2) \ + ->decltype(bind_expression(fn(), std::forward<A1>(a1), std::forward<A2>(a2))) \ + { \ + return bind_expression(fn(), std::forward<A1>(a1), std::forward<A2>(a2)); \ + } + +KFR_EXPR_UNARY(fn_neg, -) +KFR_EXPR_UNARY(fn_bitwisenot, ~) + +KFR_EXPR_BINARY(fn_add, +) +KFR_EXPR_BINARY(fn_sub, -) +KFR_EXPR_BINARY(fn_mul, *) +KFR_EXPR_BINARY(fn_div, /) +KFR_EXPR_BINARY(fn_bitwiseand, &) +KFR_EXPR_BINARY(fn_bitwiseor, |) +KFR_EXPR_BINARY(fn_bitwisexor, ^) +KFR_EXPR_BINARY(fn_shl, <<) +KFR_EXPR_BINARY(fn_shr, >>) + +KFR_EXPR_BINARY(fn_equal, ==) +KFR_EXPR_BINARY(fn_notequal, !=) +KFR_EXPR_BINARY(fn_less, <) +KFR_EXPR_BINARY(fn_greater, >) +KFR_EXPR_BINARY(fn_lessorequal, <=) +KFR_EXPR_BINARY(fn_greaterorequal, >=) } diff --git a/include/kfr/base/round.hpp b/include/kfr/base/round.hpp @@ -28,6 +28,9 @@ namespace kfr { +namespace internal +{ + #define KFR_mm_trunc_ps(V) _mm_round_ps((V), _MM_FROUND_TRUNC) #define KFR_mm_roundnearest_ps(V) _mm_round_ps((V), _MM_FROUND_NINT) #define KFR_mm_trunc_pd(V) _mm_round_pd((V), _MM_FROUND_TRUNC) @@ -48,285 +51,211 @@ namespace kfr #define KFR_mm256_trunc_pd(V) _mm256_round_pd((V), _MM_FROUND_TRUNC) #define KFR_mm256_roundnearest_pd(V) _mm256_round_pd((V), _MM_FROUND_NINT) -namespace internal -{ +#if defined CID_ARCH_SSE41 -template <cpu_t c = cpu_t::native> -struct in_round : in_round<older(c)> -{ - struct fn_floor : in_round<older(c)>::fn_floor, fn_disabled - { - }; - struct fn_ceil : in_round<older(c)>::fn_ceil, fn_disabled - { - }; - struct fn_round : in_round<older(c)>::fn_round, fn_disabled - { - }; - struct fn_trunc : in_round<older(c)>::fn_trunc, fn_disabled - { - }; - struct fn_fract : in_round<older(c)>::fn_fract, fn_disabled - { - }; -}; +KFR_SINTRIN f32sse floor(f32sse value) { return _mm_floor_ps(*value); } +KFR_SINTRIN f32sse ceil(f32sse value) { return _mm_ceil_ps(*value); } +KFR_SINTRIN f32sse trunc(f32sse value) { return KFR_mm_trunc_ps(*value); } +KFR_SINTRIN f32sse round(f32sse value) { return KFR_mm_roundnearest_ps(*value); } +KFR_SINTRIN f64sse floor(f64sse value) { return _mm_floor_pd(*value); } +KFR_SINTRIN f64sse ceil(f64sse value) { return _mm_ceil_pd(*value); } +KFR_SINTRIN f64sse trunc(f64sse value) { return KFR_mm_trunc_pd(*value); } +KFR_SINTRIN f64sse round(f64sse value) { return KFR_mm_roundnearest_pd(*value); } +KFR_SINTRIN f32sse fract(f32sse x) { return x - floor(x); } +KFR_SINTRIN f64sse fract(f64sse x) { return x - floor(x); } -template <> -struct in_round<cpu_t::common> -{ - constexpr static cpu_t cpu = cpu_t::common; +#if defined CID_ARCH_AVX - template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)> - KFR_SINTRIN vec<T, N> floor(vec<T, N> value) - { - return value; - } - template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)> - KFR_SINTRIN vec<T, N> ceil(vec<T, N> value) - { - return value; - } - template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)> - KFR_SINTRIN vec<T, N> trunc(vec<T, N> value) - { - return value; - } - template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)> - KFR_SINTRIN vec<T, N> round(vec<T, N> value) - { - return value; - } - template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)> - KFR_SINTRIN vec<T, N> fract(vec<T, N>) - { - return T(); - } +KFR_SINTRIN f32avx floor(f32avx value) { return _mm256_floor_ps(*value); } +KFR_SINTRIN f32avx ceil(f32avx value) { return _mm256_ceil_ps(*value); } +KFR_SINTRIN f32avx trunc(f32avx value) { return KFR_mm256_trunc_ps(*value); } +KFR_SINTRIN f32avx round(f32avx value) { return KFR_mm256_roundnearest_ps(*value); } +KFR_SINTRIN f64avx floor(f64avx value) { return _mm256_floor_pd(*value); } +KFR_SINTRIN f64avx ceil(f64avx value) { return _mm256_ceil_pd(*value); } +KFR_SINTRIN f64avx trunc(f64avx value) { return KFR_mm256_trunc_pd(*value); } +KFR_SINTRIN f64avx round(f64avx value) { return KFR_mm256_roundnearest_pd(*value); } +KFR_SINTRIN f32avx fract(f32avx x) { return x - floor(x); } +KFR_SINTRIN f64avx fract(f64avx x) { return x - floor(x); } +#endif - template <size_t N> - KFR_SINTRIN vec<f32, N> floor(vec<f32, N> x) - { - vec<f32, N> t = cast<f32>(cast<i32>(x)); - return t - (bitcast<f32>(x < t) & 1.f); - } - template <size_t N> - KFR_SINTRIN vec<f64, N> floor(vec<f64, N> x) - { - vec<f64, N> t = cast<f64>(cast<i64>(x)); - return t - (bitcast<f64>(x < t) & 1.0); - } - template <size_t N> - KFR_SINTRIN vec<f32, N> ceil(vec<f32, N> x) - { - vec<f32, N> t = cast<f32>(cast<i32>(x)); - return t + (bitcast<f32>(x > t) & 1.f); - } - template <size_t N> - KFR_SINTRIN vec<f64, N> ceil(vec<f64, N> x) - { - vec<f64, N> t = cast<f64>(cast<i64>(x)); - return t + (bitcast<f64>(x > t) & 1.0); - } - template <size_t N> - KFR_SINTRIN vec<f32, N> round(vec<f32, N> x) - { - return cast<f32>(cast<i32>(x + mulsign(broadcast<N>(0.5f), x))); - } - template <size_t N> - KFR_SINTRIN vec<f64, N> round(vec<f64, N> x) - { - return cast<f64>(cast<i64>(x + mulsign(broadcast<N>(0.5), x))); - } - template <size_t N> - KFR_SINTRIN vec<f32, N> trunc(vec<f32, N> x) - { - return cast<f32>(cast<i32>(x)); - } - template <size_t N> - KFR_SINTRIN vec<f64, N> trunc(vec<f64, N> x) - { - return cast<f64>(cast<i64>(x)); - } - template <size_t N> - KFR_SINTRIN vec<f32, N> fract(vec<f32, N> x) - { - return x - floor(x); - } - template <size_t N> - KFR_SINTRIN vec<f64, N> fract(vec<f64, N> x) - { - return x - floor(x); - } +KFR_HANDLE_ALL_SIZES_1(floor) +KFR_HANDLE_ALL_SIZES_1(ceil) +KFR_HANDLE_ALL_SIZES_1(round) +KFR_HANDLE_ALL_SIZES_1(trunc) +KFR_HANDLE_ALL_SIZES_1(fract) - KFR_HANDLE_SCALAR(floor) - KFR_HANDLE_SCALAR(ceil) - KFR_HANDLE_SCALAR(round) - KFR_HANDLE_SCALAR(trunc) - KFR_HANDLE_SCALAR(fract) - KFR_SPEC_FN(in_round, floor) - KFR_SPEC_FN(in_round, ceil) - KFR_SPEC_FN(in_round, round) - KFR_SPEC_FN(in_round, trunc) - KFR_SPEC_FN(in_round, fract) -}; +#else -#ifdef CID_ARCH_X86 +// fallback -template <> -struct in_round<cpu_t::sse41> : in_round<cpu_t::common> +template <size_t N> +KFR_SINTRIN vec<f32, N> floor(vec<f32, N> x) { - constexpr static cpu_t cpu = cpu_t::sse41; - - KFR_SINTRIN f32sse floor(f32sse value) { return _mm_floor_ps(*value); } - KFR_SINTRIN f32sse ceil(f32sse value) { return _mm_ceil_ps(*value); } - KFR_SINTRIN f32sse trunc(f32sse value) { return KFR_mm_trunc_ps(*value); } - KFR_SINTRIN f32sse round(f32sse value) { return KFR_mm_roundnearest_ps(*value); } - KFR_SINTRIN f64sse floor(f64sse value) { return _mm_floor_pd(*value); } - KFR_SINTRIN f64sse ceil(f64sse value) { return _mm_ceil_pd(*value); } - KFR_SINTRIN f64sse trunc(f64sse value) { return KFR_mm_trunc_pd(*value); } - KFR_SINTRIN f64sse round(f64sse value) { return KFR_mm_roundnearest_pd(*value); } - KFR_SINTRIN f32sse fract(f32sse x) { return x - floor(x); } - KFR_SINTRIN f64sse fract(f64sse x) { return x - floor(x); } - - KFR_HANDLE_ALL(floor) - KFR_HANDLE_ALL(ceil) - KFR_HANDLE_ALL(round) - KFR_HANDLE_ALL(trunc) - KFR_HANDLE_ALL(fract) - KFR_HANDLE_SCALAR(floor) - KFR_HANDLE_SCALAR(ceil) - KFR_HANDLE_SCALAR(round) - KFR_HANDLE_SCALAR(trunc) - KFR_HANDLE_SCALAR(fract) - KFR_SPEC_FN(in_round, floor) - KFR_SPEC_FN(in_round, ceil) - KFR_SPEC_FN(in_round, round) - KFR_SPEC_FN(in_round, trunc) - KFR_SPEC_FN(in_round, fract) -}; - -template <> -struct in_round<cpu_t::avx1> : in_round<cpu_t::sse41> + vec<f32, N> t = cast<f32>(cast<i32>(x)); + return t - (bitcast<f32>(x < t) & 1.f); +} +template <size_t N> +KFR_SINTRIN vec<f64, N> floor(vec<f64, N> x) { - constexpr static cpu_t cpu = cpu_t::avx1; - using in_round<cpu_t::sse41>::floor; - using in_round<cpu_t::sse41>::ceil; - using in_round<cpu_t::sse41>::trunc; - using in_round<cpu_t::sse41>::round; - using in_round<cpu_t::sse41>::fract; - - KFR_SINTRIN f32avx floor(f32avx value) { return _mm256_floor_ps(*value); } - KFR_SINTRIN f32avx ceil(f32avx value) { return _mm256_ceil_ps(*value); } - KFR_SINTRIN f32avx trunc(f32avx value) { return KFR_mm256_trunc_ps(*value); } - KFR_SINTRIN f32avx round(f32avx value) { return KFR_mm256_roundnearest_ps(*value); } - KFR_SINTRIN f64avx floor(f64avx value) { return _mm256_floor_pd(*value); } - KFR_SINTRIN f64avx ceil(f64avx value) { return _mm256_ceil_pd(*value); } - KFR_SINTRIN f64avx trunc(f64avx value) { return KFR_mm256_trunc_pd(*value); } - KFR_SINTRIN f64avx round(f64avx value) { return KFR_mm256_roundnearest_pd(*value); } - KFR_SINTRIN f32avx fract(f32avx x) { return x - floor(x); } - KFR_SINTRIN f64avx fract(f64avx x) { return x - floor(x); } - - KFR_HANDLE_ALL(floor) - KFR_HANDLE_ALL(ceil) - KFR_HANDLE_ALL(round) - KFR_HANDLE_ALL(trunc) - KFR_HANDLE_ALL(fract) - KFR_HANDLE_SCALAR(floor) - KFR_HANDLE_SCALAR(ceil) - KFR_HANDLE_SCALAR(round) - KFR_HANDLE_SCALAR(trunc) - KFR_HANDLE_SCALAR(fract) - KFR_SPEC_FN(in_round, floor) - KFR_SPEC_FN(in_round, ceil) - KFR_SPEC_FN(in_round, round) - KFR_SPEC_FN(in_round, trunc) - KFR_SPEC_FN(in_round, fract) -}; + vec<f64, N> t = cast<f64>(cast<i64>(x)); + return t - (bitcast<f64>(x < t) & 1.0); +} +template <size_t N> +KFR_SINTRIN vec<f32, N> ceil(vec<f32, N> x) +{ + vec<f32, N> t = cast<f32>(cast<i32>(x)); + return t + (bitcast<f32>(x > t) & 1.f); +} +template <size_t N> +KFR_SINTRIN vec<f64, N> ceil(vec<f64, N> x) +{ + vec<f64, N> t = cast<f64>(cast<i64>(x)); + return t + (bitcast<f64>(x > t) & 1.0); +} +template <size_t N> +KFR_SINTRIN vec<f32, N> round(vec<f32, N> x) +{ + return cast<f32>(cast<i32>(x + mulsign(broadcast<N>(0.5f), x))); +} +template <size_t N> +KFR_SINTRIN vec<f64, N> round(vec<f64, N> x) +{ + return cast<f64>(cast<i64>(x + mulsign(broadcast<N>(0.5), x))); +} +template <size_t N> +KFR_SINTRIN vec<f32, N> trunc(vec<f32, N> x) +{ + return cast<f32>(cast<i32>(x)); +} +template <size_t N> +KFR_SINTRIN vec<f64, N> trunc(vec<f64, N> x) +{ + return cast<f64>(cast<i64>(x)); +} +template <size_t N> +KFR_SINTRIN vec<f32, N> fract(vec<f32, N> x) +{ + return x - floor(x); +} +template <size_t N> +KFR_SINTRIN vec<f64, N> fract(vec<f64, N> x) +{ + return x - floor(x); +} +#endif -#undef KFR_mm_trunc_ps -#undef KFR_mm_roundnearest_ps -#undef KFR_mm_trunc_pd -#undef KFR_mm_roundnearest_pd -#undef KFR_mm_trunc_ss -#undef KFR_mm_roundnearest_ss -#undef KFR_mm_trunc_sd -#undef KFR_mm_roundnearest_sd -#undef KFR_mm_floor_ss -#undef KFR_mm_floor_sd -#undef KFR_mm_ceil_ss -#undef KFR_mm_ceil_sd -#undef KFR_mm256_trunc_ps -#undef KFR_mm256_roundnearest_ps -#undef KFR_mm256_trunc_pd -#undef KFR_mm256_roundnearest_pd +// template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)> +// KFR_SINTRIN vec<T, N> floor(vec<T, N> value) +//{ +// return value; +//} +// template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)> +// KFR_SINTRIN vec<T, N> ceil(vec<T, N> value) +//{ +// return value; +//} +// template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)> +// KFR_SINTRIN vec<T, N> trunc(vec<T, N> value) +//{ +// return value; +//} +// template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)> +// KFR_SINTRIN vec<T, N> round(vec<T, N> value) +//{ +// return value; +//} +// template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)> +// KFR_SINTRIN vec<T, N> fract(vec<T, N>) +//{ +// return T(0); +//} -#endif +KFR_HANDLE_SCALAR_1(floor) +KFR_HANDLE_SCALAR_1(ceil) +KFR_HANDLE_SCALAR_1(round) +KFR_HANDLE_SCALAR_1(trunc) +KFR_HANDLE_SCALAR_1(fract) +KFR_FN(floor) +KFR_FN(ceil) +KFR_FN(round) +KFR_FN(trunc) +KFR_FN(fract) } -namespace native -{ -using fn_floor = internal::in_round<>::fn_floor; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> KFR_INTRIN ftype<T1> floor(const T1& x) { - return internal::in_round<>::floor(x); + return internal::floor(x); } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_floor, E1> floor(E1&& x) +KFR_INTRIN expr_func<internal::fn_floor, E1> floor(E1&& x) { - return { fn_floor(), std::forward<E1>(x) }; + return { {}, std::forward<E1>(x) }; } -using fn_ceil = internal::in_round<>::fn_ceil; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> KFR_INTRIN ftype<T1> ceil(const T1& x) { - return internal::in_round<>::ceil(x); + return internal::ceil(x); } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_ceil, E1> ceil(E1&& x) +KFR_INTRIN expr_func<internal::fn_ceil, E1> ceil(E1&& x) { - return { fn_ceil(), std::forward<E1>(x) }; + return { {}, std::forward<E1>(x) }; } -using fn_round = internal::in_round<>::fn_round; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> KFR_INTRIN ftype<T1> round(const T1& x) { - return internal::in_round<>::round(x); + return internal::round(x); } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_round, E1> round(E1&& x) +KFR_INTRIN expr_func<internal::fn_round, E1> round(E1&& x) { - return { fn_round(), std::forward<E1>(x) }; + return { {}, std::forward<E1>(x) }; } -using fn_trunc = internal::in_round<>::fn_trunc; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> KFR_INTRIN ftype<T1> trunc(const T1& x) { - return internal::in_round<>::trunc(x); + return internal::trunc(x); } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_trunc, E1> trunc(E1&& x) +KFR_INTRIN expr_func<internal::fn_trunc, E1> trunc(E1&& x) { - return { fn_trunc(), std::forward<E1>(x) }; + return { {}, std::forward<E1>(x) }; } -using fn_fract = internal::in_round<>::fn_fract; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> KFR_INTRIN ftype<T1> fract(const T1& x) { - return internal::in_round<>::fract(x); + return internal::fract(x); } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_fract, E1> fract(E1&& x) +KFR_INTRIN expr_func<internal::fn_fract, E1> fract(E1&& x) { - return { fn_fract(), std::forward<E1>(x) }; -} + return { {}, std::forward<E1>(x) }; } } + +#undef KFR_mm_trunc_ps +#undef KFR_mm_roundnearest_ps +#undef KFR_mm_trunc_pd +#undef KFR_mm_roundnearest_pd +#undef KFR_mm_trunc_ss +#undef KFR_mm_roundnearest_ss +#undef KFR_mm_trunc_sd +#undef KFR_mm_roundnearest_sd +#undef KFR_mm_floor_ss +#undef KFR_mm_floor_sd +#undef KFR_mm_ceil_ss +#undef KFR_mm_ceil_sd +#undef KFR_mm256_trunc_ps +#undef KFR_mm256_roundnearest_ps +#undef KFR_mm256_trunc_pd +#undef KFR_mm256_roundnearest_pd diff --git a/include/kfr/base/saturation.hpp b/include/kfr/base/saturation.hpp @@ -25,181 +25,125 @@ #include "function.hpp" #include "select.hpp" -#pragma clang diagnostic push -#if CID_HAS_WARNING("-Winaccessible-base") -#pragma clang diagnostic ignored "-Winaccessible-base" -#endif - namespace kfr { namespace internal { - -template <cpu_t c = cpu_t::native, cpu_t cc = c> -struct in_saturated : in_saturated<older(c), cc> +template <typename T, size_t N> +KFR_SINTRIN vec<T, N> saturated_signed_add(vec<T, N> a, vec<T, N> b) { - struct fn_satadd : in_saturated<older(c), cc>::fn_satadd, fn_disabled - { - }; - struct fn_satsub : in_saturated<older(c), cc>::fn_satsub, fn_disabled - { - }; -}; - -template <cpu_t cc> -struct in_saturated<cpu_t::common, cc> : in_select<cc> + constexpr size_t shift = typebits<i32>::bits - 1; + const vec<T, N> sum = a + b; + a = (a >> shift) + allonesvector(a); + + return select(((a ^ b) | ~(b ^ sum)) >= 0, a, sum); +} +template <typename T, size_t N> +KFR_SINTRIN vec<T, N> saturated_signed_sub(vec<T, N> a, vec<T, N> b) { - constexpr static cpu_t cpu = cpu_t::common; - - template <typename T, size_t N, KFR_ENABLE_IF(std::is_signed<T>::value)> - KFR_SINTRIN vec<T, N> satadd(vec<T, N> a, vec<T, N> b) - { - return saturated_signed_add(a, b); - } - template <typename T, size_t N, KFR_ENABLE_IF(std::is_unsigned<T>::value)> - KFR_SINTRIN vec<T, N> satadd(vec<T, N> a, vec<T, N> b) - { - return saturated_unsigned_add(a, b); - } - - template <typename T, size_t N, KFR_ENABLE_IF(std::is_signed<T>::value)> - KFR_SINTRIN vec<T, N> satsub(vec<T, N> a, vec<T, N> b) - { - return saturated_signed_sub(a, b); - } - template <typename T, size_t N, KFR_ENABLE_IF(std::is_unsigned<T>::value)> - KFR_SINTRIN vec<T, N> satsub(vec<T, N> a, vec<T, N> b) - { - return saturated_unsigned_sub(a, b); - } - KFR_SPEC_FN(in_saturated, satadd) - KFR_SPEC_FN(in_saturated, satsub) - -protected: - template <typename T, size_t N> - KFR_SINTRIN vec<T, N> saturated_signed_add(vec<T, N> a, vec<T, N> b) - { - constexpr size_t shift = typebits<i32>::bits - 1; - const vec<T, N> sum = a + b; - a = (a >> shift) + allonesvector(a); - - return select(((a ^ b) | ~(b ^ sum)) >= 0, a, sum); - } - template <typename T, size_t N> - KFR_SINTRIN vec<T, N> saturated_signed_sub(vec<T, N> a, vec<T, N> b) - { - constexpr size_t shift = typebits<i32>::bits - 1; - const vec<T, N> diff = a - b; - a = (a >> shift) + allonesvector(a); - - return select(((a ^ b) & (a ^ diff)) < 0, a, diff); - } - template <typename T, size_t N> - KFR_SINTRIN vec<T, N> saturated_unsigned_add(vec<T, N> a, vec<T, N> b) - { - constexpr vec<T, N> t = allonesvector(a); - return select(a > t - b, t, a + b); - } - template <typename T, size_t N> - KFR_SINTRIN vec<T, N> saturated_unsigned_sub(vec<T, N> a, vec<T, N> b) - { - return select(a < b, zerovector(a), a - b); - } -}; - -#ifdef CID_ARCH_X86 - -template <cpu_t cc> -struct in_saturated<cpu_t::sse2, cc> : in_saturated<cpu_t::common>, in_select<cc> + constexpr size_t shift = typebits<i32>::bits - 1; + const vec<T, N> diff = a - b; + a = (a >> shift) + allonesvector(a); + + return select(((a ^ b) & (a ^ diff)) < 0, a, diff); +} +template <typename T, size_t N> +KFR_SINTRIN vec<T, N> saturated_unsigned_add(vec<T, N> a, vec<T, N> b) { - constexpr static cpu_t cpu = cpu_t::sse2; - -private: - using in_select<cc>::select; - -public: - KFR_SINTRIN u8sse satadd(u8sse x, u8sse y) { return _mm_adds_epu8(*x, *y); } - KFR_SINTRIN i8sse satadd(i8sse x, i8sse y) { return _mm_adds_epi8(*x, *y); } - KFR_SINTRIN u16sse satadd(u16sse x, u16sse y) { return _mm_adds_epu16(*x, *y); } - KFR_SINTRIN i16sse satadd(i16sse x, i16sse y) { return _mm_adds_epi16(*x, *y); } - - KFR_SINTRIN u8sse satsub(u8sse x, u8sse y) { return _mm_subs_epu8(*x, *y); } - KFR_SINTRIN i8sse satsub(i8sse x, i8sse y) { return _mm_subs_epi8(*x, *y); } - KFR_SINTRIN u16sse satsub(u16sse x, u16sse y) { return _mm_subs_epu16(*x, *y); } - KFR_SINTRIN i16sse satsub(i16sse x, i16sse y) { return _mm_subs_epi16(*x, *y); } - - KFR_SINTRIN i32sse satadd(i32sse a, i32sse b) { return saturated_signed_add(a, b); } - KFR_SINTRIN i64sse satadd(i64sse a, i64sse b) { return saturated_signed_add(a, b); } - KFR_SINTRIN u32sse satadd(u32sse a, u32sse b) { return saturated_unsigned_add(a, b); } - KFR_SINTRIN u64sse satadd(u64sse a, u64sse b) { return saturated_unsigned_add(a, b); } - - KFR_SINTRIN i32sse satsub(i32sse a, i32sse b) { return saturated_signed_sub(a, b); } - KFR_SINTRIN i64sse satsub(i64sse a, i64sse b) { return saturated_signed_sub(a, b); } - KFR_SINTRIN u32sse satsub(u32sse a, u32sse b) { return saturated_unsigned_sub(a, b); } - KFR_SINTRIN u64sse satsub(u64sse a, u64sse b) { return saturated_unsigned_sub(a, b); } - - KFR_HANDLE_ALL(satadd) - KFR_HANDLE_ALL(satsub) - KFR_HANDLE_SCALAR(satadd) - KFR_HANDLE_SCALAR(satsub) - KFR_SPEC_FN(in_saturated, satadd) - KFR_SPEC_FN(in_saturated, satsub) -}; - -template <cpu_t cc> -struct in_saturated<cpu_t::avx2, cc> : in_saturated<cpu_t::sse2, cc> + const vec<T, N> t = allonesvector(a); + return select(a > t - b, t, a + b); +} +template <typename T, size_t N> +KFR_SINTRIN vec<T, N> saturated_unsigned_sub(vec<T, N> a, vec<T, N> b) { - constexpr static cpu_t cpu = cpu_t::avx2; - using in_saturated<cpu_t::sse2, cc>::satadd; - using in_saturated<cpu_t::sse2, cc>::satsub; - - KFR_SINTRIN u8avx satadd(u8avx x, u8avx y) { return _mm256_adds_epu8(*x, *y); } - KFR_SINTRIN i8avx satadd(i8avx x, i8avx y) { return _mm256_adds_epi8(*x, *y); } - KFR_SINTRIN u16avx satadd(u16avx x, u16avx y) { return _mm256_adds_epu16(*x, *y); } - KFR_SINTRIN i16avx satadd(i16avx x, i16avx y) { return _mm256_adds_epi16(*x, *y); } - - KFR_SINTRIN u8avx satsub(u8avx x, u8avx y) { return _mm256_subs_epu8(*x, *y); } - KFR_SINTRIN i8avx satsub(i8avx x, i8avx y) { return _mm256_subs_epi8(*x, *y); } - KFR_SINTRIN u16avx satsub(u16avx x, u16avx y) { return _mm256_subs_epu16(*x, *y); } - KFR_SINTRIN i16avx satsub(i16avx x, i16avx y) { return _mm256_subs_epi16(*x, *y); } - - KFR_HANDLE_ALL(satadd) - KFR_HANDLE_ALL(satsub) - KFR_HANDLE_SCALAR(satadd) - KFR_HANDLE_SCALAR(satsub) - KFR_SPEC_FN(in_saturated, satadd) - KFR_SPEC_FN(in_saturated, satsub) -}; + return select(a < b, zerovector(a), a - b); +} + +#if defined CID_ARCH_SSE2 + +KFR_SINTRIN u8sse satadd(u8sse x, u8sse y) { return _mm_adds_epu8(*x, *y); } +KFR_SINTRIN i8sse satadd(i8sse x, i8sse y) { return _mm_adds_epi8(*x, *y); } +KFR_SINTRIN u16sse satadd(u16sse x, u16sse y) { return _mm_adds_epu16(*x, *y); } +KFR_SINTRIN i16sse satadd(i16sse x, i16sse y) { return _mm_adds_epi16(*x, *y); } + +KFR_SINTRIN u8sse satsub(u8sse x, u8sse y) { return _mm_subs_epu8(*x, *y); } +KFR_SINTRIN i8sse satsub(i8sse x, i8sse y) { return _mm_subs_epi8(*x, *y); } +KFR_SINTRIN u16sse satsub(u16sse x, u16sse y) { return _mm_subs_epu16(*x, *y); } +KFR_SINTRIN i16sse satsub(i16sse x, i16sse y) { return _mm_subs_epi16(*x, *y); } + +KFR_SINTRIN i32sse satadd(i32sse a, i32sse b) { return saturated_signed_add(a, b); } +KFR_SINTRIN i64sse satadd(i64sse a, i64sse b) { return saturated_signed_add(a, b); } +KFR_SINTRIN u32sse satadd(u32sse a, u32sse b) { return saturated_unsigned_add(a, b); } +KFR_SINTRIN u64sse satadd(u64sse a, u64sse b) { return saturated_unsigned_add(a, b); } + +KFR_SINTRIN i32sse satsub(i32sse a, i32sse b) { return saturated_signed_sub(a, b); } +KFR_SINTRIN i64sse satsub(i64sse a, i64sse b) { return saturated_signed_sub(a, b); } +KFR_SINTRIN u32sse satsub(u32sse a, u32sse b) { return saturated_unsigned_sub(a, b); } +KFR_SINTRIN u64sse satsub(u64sse a, u64sse b) { return saturated_unsigned_sub(a, b); } + +#if defined CID_ARCH_AVX2 +KFR_SINTRIN u8avx satadd(u8avx x, u8avx y) { return _mm256_adds_epu8(*x, *y); } +KFR_SINTRIN i8avx satadd(i8avx x, i8avx y) { return _mm256_adds_epi8(*x, *y); } +KFR_SINTRIN u16avx satadd(u16avx x, u16avx y) { return _mm256_adds_epu16(*x, *y); } +KFR_SINTRIN i16avx satadd(i16avx x, i16avx y) { return _mm256_adds_epi16(*x, *y); } + +KFR_SINTRIN u8avx satsub(u8avx x, u8avx y) { return _mm256_subs_epu8(*x, *y); } +KFR_SINTRIN i8avx satsub(i8avx x, i8avx y) { return _mm256_subs_epi8(*x, *y); } +KFR_SINTRIN u16avx satsub(u16avx x, u16avx y) { return _mm256_subs_epu16(*x, *y); } +KFR_SINTRIN i16avx satsub(i16avx x, i16avx y) { return _mm256_subs_epi16(*x, *y); } #endif + +#else +// fallback +template <typename T, size_t N, KFR_ENABLE_IF(std::is_signed<T>::value)> +KFR_SINTRIN vec<T, N> satadd(vec<T, N> a, vec<T, N> b) +{ + return saturated_signed_add(a, b); } -namespace native +template <typename T, size_t N, KFR_ENABLE_IF(std::is_unsigned<T>::value)> +KFR_SINTRIN vec<T, N> satadd(vec<T, N> a, vec<T, N> b) { -using fn_satadd = internal::in_saturated<>::fn_satadd; + return saturated_unsigned_add(a, b); +} +template <typename T, size_t N, KFR_ENABLE_IF(std::is_signed<T>::value)> +KFR_SINTRIN vec<T, N> satsub(vec<T, N> a, vec<T, N> b) +{ + return saturated_signed_sub(a, b); +} +template <typename T, size_t N, KFR_ENABLE_IF(std::is_unsigned<T>::value)> +KFR_SINTRIN vec<T, N> satsub(vec<T, N> a, vec<T, N> b) +{ + return saturated_unsigned_sub(a, b); +} +#endif +KFR_HANDLE_SCALAR_1(satadd) +KFR_FN(satadd) +KFR_HANDLE_SCALAR_1(satsub) +KFR_FN(satsub) +} + template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)> -KFR_INLINE ftype<common_type<T1, T2>> satadd(const T1& x, const T2& y) +KFR_INTRIN common_type<T1, T2> satadd(const T1& x, const T2& y) { - return internal::in_saturated<>::satadd(x, y); + return internal::satadd(x, y); } template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)> -KFR_INLINE expr_func<fn_satadd, E1, E2> satadd(E1&& x, E2&& y) +KFR_INTRIN expr_func<internal::fn_satadd, E1, E2> satadd(E1&& x, E2&& y) { - return { fn_satadd(), std::forward<E1>(x), std::forward<E2>(y) }; + return { {}, std::forward<E1>(x), std::forward<E2>(y) }; } -using fn_satsub = internal::in_saturated<>::fn_satsub; + template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)> -KFR_INLINE ftype<common_type<T1, T2>> satsub(const T1& x, const T2& y) +KFR_INTRIN common_type<T1, T2> satsub(const T1& x, const T2& y) { - return internal::in_saturated<>::satsub(x, y); + return internal::satsub(x, y); } template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)> -KFR_INLINE expr_func<fn_satsub, E1, E2> satsub(E1&& x, E2&& y) +KFR_INTRIN expr_func<internal::fn_satsub, E1, E2> satsub(E1&& x, E2&& y) { - return { fn_satsub(), std::forward<E1>(x), std::forward<E2>(y) }; + return { {}, std::forward<E1>(x), std::forward<E2>(y) }; } } -} - -#pragma clang diagnostic pop diff --git a/include/kfr/base/select.hpp b/include/kfr/base/select.hpp @@ -29,174 +29,71 @@ namespace kfr namespace internal { -template <cpu_t c> -struct in_select_impl : in_select_impl<older(c)> -{ - struct fn_select : fn_disabled - { - }; -}; - -template <> -struct in_select_impl<cpu_t::common> -{ - constexpr static cpu_t cur = cpu_t::common; - - template <typename T, size_t N> - KFR_SINTRIN vec<T, N> select(vec<T, N> m, vec<T, N> x, vec<T, N> y) - { - return y ^ ((x ^ y) & m); - } - KFR_SPEC_FN(in_select_impl, select) -}; +#if defined CID_ARCH_SSE41 + +KFR_SINTRIN u8sse select(mu8sse m, u8sse x, u8sse y) { return _mm_blendv_epi8(*y, *x, *m); } +KFR_SINTRIN u16sse select(mu16sse m, u16sse x, u16sse y) { return _mm_blendv_epi8(*y, *x, *m); } +KFR_SINTRIN u32sse select(mu32sse m, u32sse x, u32sse y) { return _mm_blendv_epi8(*y, *x, *m); } +KFR_SINTRIN u64sse select(mu64sse m, u64sse x, u64sse y) { return _mm_blendv_epi8(*y, *x, *m); } +KFR_SINTRIN i8sse select(mi8sse m, i8sse x, i8sse y) { return _mm_blendv_epi8(*y, *x, *m); } +KFR_SINTRIN i16sse select(mi16sse m, i16sse x, i16sse y) { return _mm_blendv_epi8(*y, *x, *m); } +KFR_SINTRIN i32sse select(mi32sse m, i32sse x, i32sse y) { return _mm_blendv_epi8(*y, *x, *m); } +KFR_SINTRIN i64sse select(mi64sse m, i64sse x, i64sse y) { return _mm_blendv_epi8(*y, *x, *m); } +KFR_SINTRIN f32sse select(mf32sse m, f32sse x, f32sse y) { return _mm_blendv_ps(*y, *x, *m); } +KFR_SINTRIN f64sse select(mf64sse m, f64sse x, f64sse y) { return _mm_blendv_pd(*y, *x, *m); } + +#if defined CID_ARCH_AVX +KFR_SINTRIN f64avx select(mf64avx m, f64avx x, f64avx y) { return _mm256_blendv_pd(*y, *x, *m); } +KFR_SINTRIN f32avx select(mf32avx m, f32avx x, f32avx y) { return _mm256_blendv_ps(*y, *x, *m); } +#endif -#ifdef CID_ARCH_X86 +#if defined CID_ARCH_AVX2 +KFR_SINTRIN u8avx select(mu8avx m, u8avx x, u8avx y) { return _mm256_blendv_epi8(*y, *x, *m); } +KFR_SINTRIN u16avx select(mu16avx m, u16avx x, u16avx y) { return _mm256_blendv_epi8(*y, *x, *m); } +KFR_SINTRIN u32avx select(mu32avx m, u32avx x, u32avx y) { return _mm256_blendv_epi8(*y, *x, *m); } +KFR_SINTRIN u64avx select(mu64avx m, u64avx x, u64avx y) { return _mm256_blendv_epi8(*y, *x, *m); } +KFR_SINTRIN i8avx select(mi8avx m, i8avx x, i8avx y) { return _mm256_blendv_epi8(*y, *x, *m); } +KFR_SINTRIN i16avx select(mi16avx m, i16avx x, i16avx y) { return _mm256_blendv_epi8(*y, *x, *m); } +KFR_SINTRIN i32avx select(mi32avx m, i32avx x, i32avx y) { return _mm256_blendv_epi8(*y, *x, *m); } +KFR_SINTRIN i64avx select(mi64avx m, i64avx x, i64avx y) { return _mm256_blendv_epi8(*y, *x, *m); } +#endif -template <> -struct in_select_impl<cpu_t::sse41> : in_select_impl<cpu_t::common> +template <typename T, size_t N, KFR_ENABLE_IF(N < vector_width<T, cpu_t::native>)> +KFR_SINTRIN vec<T, N> select(mask<T, N> a, vec<T, N> b, vec<T, N> c) { - constexpr static cpu_t cpu = cpu_t::sse41; - - KFR_CPU_INTRIN(sse41) u8sse select(u8sse m, u8sse x, u8sse y) { return _mm_blendv_epi8(*y, *x, *m); } - KFR_CPU_INTRIN(sse41) u16sse select(u16sse m, u16sse x, u16sse y) { return _mm_blendv_epi8(*y, *x, *m); } - KFR_CPU_INTRIN(sse41) u32sse select(u32sse m, u32sse x, u32sse y) { return _mm_blendv_epi8(*y, *x, *m); } - KFR_CPU_INTRIN(sse41) u64sse select(u64sse m, u64sse x, u64sse y) { return _mm_blendv_epi8(*y, *x, *m); } - KFR_CPU_INTRIN(sse41) i8sse select(i8sse m, i8sse x, i8sse y) { return _mm_blendv_epi8(*y, *x, *m); } - KFR_CPU_INTRIN(sse41) i16sse select(i16sse m, i16sse x, i16sse y) { return _mm_blendv_epi8(*y, *x, *m); } - KFR_CPU_INTRIN(sse41) i32sse select(i32sse m, i32sse x, i32sse y) { return _mm_blendv_epi8(*y, *x, *m); } - KFR_CPU_INTRIN(sse41) i64sse select(i64sse m, i64sse x, i64sse y) { return _mm_blendv_epi8(*y, *x, *m); } - KFR_CPU_INTRIN(sse41) f32sse select(f32sse m, f32sse x, f32sse y) { return _mm_blendv_ps(*y, *x, *m); } - KFR_CPU_INTRIN(sse41) f64sse select(f64sse m, f64sse x, f64sse y) { return _mm_blendv_pd(*y, *x, *m); } - - KFR_HANDLE_ALL(select) - KFR_SPEC_FN(in_select_impl, select) -}; - -template <> -struct in_select_impl<cpu_t::avx1> : in_select_impl<cpu_t::sse41> + return slice<0, N>(select(expand_simd(a).asmask(), expand_simd(b), expand_simd(c))); +} +template <typename T, size_t N, KFR_ENABLE_IF(N >= vector_width<T, cpu_t::native>), typename = void> +KFR_SINTRIN vec<T, N> select(mask<T, N> a, vec<T, N> b, vec<T, N> c) { - constexpr static cpu_t cpu = cpu_t::avx1; - using in_select_impl<cpu_t::sse41>::select; - - KFR_CPU_INTRIN(avx) f64avx select(f64avx m, f64avx x, f64avx y) { return _mm256_blendv_pd(*y, *x, *m); } - KFR_CPU_INTRIN(avx) f32avx select(f32avx m, f32avx x, f32avx y) { return _mm256_blendv_ps(*y, *x, *m); } + return concat(select(low(a).asmask(), low(b), low(c)), select(high(a).asmask(), high(b), high(c))); +} - KFR_HANDLE_ALL(select) - KFR_SPEC_FN(in_select_impl, select) -}; +#else -template <> -struct in_select_impl<cpu_t::avx2> : in_select_impl<cpu_t::avx1> +// fallback +template <typename T, size_t N> +KFR_SINTRIN vec<T, N> select(mask<T, N> m, vec<T, N> x, vec<T, N> y) { - constexpr static cpu_t cpu = cpu_t::avx2; - using in_select_impl<cpu_t::avx1>::select; - - KFR_CPU_INTRIN(avx2) u8avx select(u8avx m, u8avx x, u8avx y) { return _mm256_blendv_epi8(*y, *x, *m); } - KFR_CPU_INTRIN(avx2) u16avx select(u16avx m, u16avx x, u16avx y) - { - return _mm256_blendv_epi8(*y, *x, *m); - } - KFR_CPU_INTRIN(avx2) u32avx select(u32avx m, u32avx x, u32avx y) - { - return _mm256_blendv_epi8(*y, *x, *m); - } - KFR_CPU_INTRIN(avx2) u64avx select(u64avx m, u64avx x, u64avx y) - { - return _mm256_blendv_epi8(*y, *x, *m); - } - KFR_CPU_INTRIN(avx2) i8avx select(i8avx m, i8avx x, i8avx y) { return _mm256_blendv_epi8(*y, *x, *m); } - KFR_CPU_INTRIN(avx2) i16avx select(i16avx m, i16avx x, i16avx y) - { - return _mm256_blendv_epi8(*y, *x, *m); - } - KFR_CPU_INTRIN(avx2) i32avx select(i32avx m, i32avx x, i32avx y) - { - return _mm256_blendv_epi8(*y, *x, *m); - } - KFR_CPU_INTRIN(avx2) i64avx select(i64avx m, i64avx x, i64avx y) - { - return _mm256_blendv_epi8(*y, *x, *m); - } - - KFR_HANDLE_ALL(select) - KFR_SPEC_FN(in_select_impl, select) -}; - + return y ^ ((x ^ y) & m); +} #endif -template <cpu_t c = cpu_t::native> -struct in_select : in_select_impl<c> -{ - using in_select_impl<c>::select; - - template <typename T, size_t N, typename M> - KFR_SINTRIN vec<T, N> select(mask<M, N> m, vec<T, N> x, vec<T, N> y) - { - static_assert(sizeof(M) == sizeof(T), "select: Incompatible types"); - return in_select_impl<c>::select(bitcast<T>(m), x, y); - } - template <typename T, size_t N, typename M> - KFR_SINTRIN vec<T, N> select(mask<M, N> m, mask<T, N> x, mask<T, N> y) - { - static_assert(sizeof(M) == sizeof(T), "select: Incompatible types"); - return in_select_impl<c>::select(bitcast<T>(m), ref_cast<vec<T, N>>(x), ref_cast<vec<T, N>>(y)); - } - - template <typename T, size_t N, typename M> - KFR_SINTRIN vec<T, N> select(mask<M, N> m, T x, T y) - { - static_assert(sizeof(M) == sizeof(T), "select: Incompatible types"); - return in_select_impl<c>::select(bitcast<T>(m), broadcast<N>(x), broadcast<N>(y)); - } - - template <typename T, size_t N, typename M> - KFR_SINTRIN vec<T, N> select(mask<M, N> m, vec<T, N> x, T y) - { - static_assert(sizeof(M) == sizeof(T), "select: Incompatible types"); - return in_select_impl<c>::select(bitcast<T>(m), x, broadcast<N>(y)); - } - - template <typename T, size_t N, typename M> - KFR_SINTRIN vec<T, N> select(mask<M, N> m, T x, vec<T, N> y) - { - static_assert(sizeof(M) == sizeof(T), "select: Incompatible types"); - return in_select_impl<c>::select(bitcast<T>(m), broadcast<N>(x), y); - } - template <typename T, size_t N, typename M> - KFR_SINTRIN vec<T, N> select(mask<M, N> m, mask<T, N> x, T y) - { - static_assert(sizeof(M) == sizeof(T), "select: Incompatible types"); - return in_select_impl<c>::select(bitcast<T>(m), ref_cast<vec<T, N>>(x), broadcast<N>(y)); - } - - template <typename T, size_t N, typename M> - KFR_SINTRIN vec<T, N> select(mask<M, N> m, T x, mask<T, N> y) - { - static_assert(sizeof(M) == sizeof(T), "select: Incompatible types"); - return in_select_impl<c>::select(m, broadcast<N>(x), ref_cast<vec<T, N>>(y)); - } - KFR_SPEC_FN(in_select, select) - - template <typename T, size_t N> - KFR_SINTRIN vec<T, N> sign(vec<T, N> x) - { - return select(x > T(), T(1), select(x < T(), T(-1), T(0))); - } -}; +KFR_I_FN(select) } -namespace native +template <typename T1, size_t N, typename T2, typename T3, + KFR_ENABLE_IF(is_numeric_args<T1, T2, T3>::value), + typename Tout = subtype<common_type<T2, T3>>> +KFR_INTRIN vec<Tout, N> select(const mask<T1, N>& m, const T2& x, const T3& y) { -using fn_select = internal::in_select<>::fn_select; -template <typename T1, typename T2, typename T3, KFR_ENABLE_IF(is_numeric_args<T1, T2, T3>::value)> -KFR_INLINE ftype<common_type<T2, T3>> select(const T1& arg1, const T2& arg2, const T3& arg3) -{ - return internal::in_select<>::select(arg1, arg2, arg3); + static_assert(sizeof(T1) == sizeof(Tout), "select: incompatible types"); + return internal::select(bitcast<Tout>(m).asmask(), static_cast<vec<Tout, N>>(x), static_cast<vec<Tout, N>>(y)); } + template <typename E1, typename E2, typename E3, KFR_ENABLE_IF(is_input_expressions<E1, E2, E3>::value)> -KFR_INLINE expr_func<fn_select, E1, E2, E3> select(E1&& arg1, E2&& arg2, E3&& arg3) +KFR_INTRIN expr_func<internal::fn_select, E1, E2, E3> select(E1&& m, E2&& x, E3&& y) { - return { fn_select(), std::forward<E1>(arg1), std::forward<E2>(arg2), std::forward<E3>(arg3) }; -} + return { {}, std::forward<E1>(m), std::forward<E2>(x), std::forward<E3>(y) }; } } diff --git a/include/kfr/base/sin_cos.hpp b/include/kfr/base/sin_cos.hpp @@ -31,10 +31,6 @@ #include "select.hpp" #include "shuffle.hpp" -#pragma clang diagnostic push -#if CID_HAS_WARNING("-Winaccessible-base") -#pragma clang diagnostic ignored "-Winaccessible-base" -#endif #if CID_HAS_WARNING("-Wc99-extensions") #pragma clang diagnostic ignored "-Wc99-extensions" #endif @@ -45,517 +41,402 @@ namespace kfr namespace internal { -template <cpu_t c = cpu_t::native, cpu_t cc = c> -struct in_trig : in_select<cc> -{ -private: - using in_select<cc>::select; - -protected: - template <typename T, size_t N> - KFR_SINTRIN vec<T, N> mask_horner(vec<T, N>, mask<T, N> msk, T a0, T b0) - { - return select(msk, a0, b0); - } - - template <typename T, size_t N, typename... Ts> - KFR_SINTRIN vec<T, N> mask_horner(vec<T, N> x, mask<T, N> msk, T a0, T b0, T a1, T b1, Ts... values) - { - return fmadd(mask_horner(x, msk, a1, b1, values...), x, select(msk, a0, b0)); - } -}; - -template <cpu_t c = cpu_t::native, cpu_t cc = c> -struct in_sin_cos : private in_trig<cc>, private in_select<cc>, private in_round<cc>, private in_abs<cc> -{ - -private: - using in_abs<cc>::abs; - using in_round<cc>::floor; - using in_select<cc>::select; - using in_trig<cc>::mask_horner; - - template <typename T, size_t N, typename Tprecise = f64> - KFR_SINTRIN vec<T, N> trig_fold(vec<T, N> x, vec<itype<T>, N>& quadrant) - { - const vec<T, N> xabs = abs(x); - constexpr vec<T, N> div = fold_constant_div<T>; - vec<T, N> y = floor(xabs / div); - quadrant = cast<itype<T>>(y - floor(y * T(1.0 / 16.0)) * T(16.0)); - - const mask<T, N> msk = bitcast<T>((quadrant & 1) != 0); - quadrant = select(msk, quadrant + 1, quadrant); - y = select(msk, y + T(1.0), y); - quadrant = quadrant & 7; - - constexpr vec<Tprecise, N> hi = cast<Tprecise>(fold_constant_hi<T>); - constexpr vec<T, N> rem1 = fold_constant_rem1<T>; - constexpr vec<T, N> rem2 = fold_constant_rem2<T>; - return cast<T>(cast<Tprecise>(xabs) - cast<Tprecise>(y) * hi) - y * rem1 - y * rem2; - } - - template <size_t N> - KFR_SINTRIN vec<f32, N> trig_sincos(vec<f32, N> folded, mask<f32, N> cosmask) - { - constexpr f32 sin_c2 = -0x2.aaaaacp-4f; - constexpr f32 sin_c4 = 0x2.222334p-8f; - constexpr f32 sin_c6 = -0xd.0566ep-16f; - constexpr f32 sin_c8 = 0x3.64cc1cp-20f; - constexpr f32 sin_c10 = -0x5.6c4a4p-24f; - constexpr f32 cos_c2 = -0x8.p-4f; - constexpr f32 cos_c4 = 0xa.aaaabp-8f; - constexpr f32 cos_c6 = -0x5.b05d48p-12f; - constexpr f32 cos_c8 = 0x1.a065f8p-16f; - constexpr f32 cos_c10 = -0x4.cd156p-24f; - - const vec<f32, N> x2 = folded * folded; - - vec<f32, N> formula = mask_horner(x2, cosmask, 1.0f, 1.0f, cos_c2, sin_c2, cos_c4, sin_c4, cos_c6, - sin_c6, cos_c8, sin_c8, cos_c10, sin_c10); - - formula = select(cosmask, formula, formula * folded); - return formula; - } - - template <size_t N> - KFR_SINTRIN vec<f64, N> trig_sincos(vec<f64, N> folded, mask<f64, N> cosmask) - { - constexpr f64 sin_c2 = -0x2.aaaaaaaaaaaaap-4; - constexpr f64 sin_c4 = 0x2.22222222220cep-8; - constexpr f64 sin_c6 = -0xd.00d00cffd6618p-16; - constexpr f64 sin_c8 = 0x2.e3bc744fb879ep-20; - constexpr f64 sin_c10 = -0x6.b99034c1467a4p-28; - constexpr f64 sin_c12 = 0xb.0711ea8fe8ee8p-36; - constexpr f64 sin_c14 = -0xb.7e010897e55dp-44; - constexpr f64 sin_c16 = -0xb.64eac07f1d6bp-48; - constexpr f64 cos_c2 = -0x8.p-4; - constexpr f64 cos_c4 = 0xa.aaaaaaaaaaaa8p-8; - constexpr f64 cos_c6 = -0x5.b05b05b05ad28p-12; - constexpr f64 cos_c8 = 0x1.a01a01a0022e6p-16; - constexpr f64 cos_c10 = -0x4.9f93ed845de2cp-24; - constexpr f64 cos_c12 = 0x8.f76bc015abe48p-32; - constexpr f64 cos_c14 = -0xc.9bf2dbe00379p-40; - constexpr f64 cos_c16 = 0xd.1232ac32f7258p-48; - - vec<f64, N> x2 = folded * folded; - vec<f64, N> formula = - mask_horner(x2, cosmask, 1.0, 1.0, cos_c2, sin_c2, cos_c4, sin_c4, cos_c6, sin_c6, cos_c8, sin_c8, - cos_c10, sin_c10, cos_c12, sin_c12, cos_c14, sin_c14, cos_c16, sin_c16); - - formula = select(cosmask, formula, formula * folded); - return formula; - } - - template <typename T, size_t N, typename = u8[N > 1]> - KFR_SINTRIN vec<T, N> sincos_mask(vec<T, N> x_full, mask<T, N> cosmask) - { - vec<itype<T>, N> quadrant; - vec<T, N> folded = trig_fold(x_full, quadrant); - - mask<T, N> flip_sign = select(cosmask, (quadrant == 2) || (quadrant == 4), quadrant >= 4); - - mask<T, N> usecos = (quadrant == 2) || (quadrant == 6); - usecos = usecos ^ cosmask; - - vec<T, N> formula = trig_sincos(folded, usecos); - - mask<T, N> negmask = x_full < 0; - - flip_sign = flip_sign ^ (negmask & ~cosmask); - - formula = select(flip_sign, -formula, formula); - return formula; - } - - template <typename T> - constexpr static T fold_constant_div = choose_const<T>(0x1.921fb6p-1f, 0x1.921fb54442d18p-1); - - template <typename T> - constexpr static T fold_constant_hi = choose_const<T>(0x1.922000p-1f, 0x1.921fb40000000p-1); - template <typename T> - constexpr static T fold_constant_rem1 = choose_const<T>(-0x1.2ae000p-19f, 0x1.4442d00000000p-25); - template <typename T> - constexpr static T fold_constant_rem2 = choose_const<T>(-0x1.de973ep-32f, 0x1.8469898cc5170p-49); - constexpr static cpu_t cur = c; - -public: - template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)> - KFR_SINTRIN vec<T, N> sin(vec<T, N> x) - { - vec<itype<T>, N> quadrant; - vec<T, N> folded = trig_fold(x, quadrant); - - mask<T, N> flip_sign = quadrant >= 4; - mask<T, N> usecos = (quadrant == 2) || (quadrant == 6); - - vec<T, N> formula = trig_sincos(folded, usecos); - - formula = select(flip_sign ^ x.asmask(), -formula, formula); - return formula; - } - template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)> - KFR_SINTRIN vec<T, N> cos(vec<T, N> x) - { - vec<itype<T>, N> quadrant; - vec<T, N> folded = trig_fold(x, quadrant); - - mask<T, N> eq4 = (quadrant == 4); - mask<T, N> flip_sign = (quadrant == 2) || eq4; - mask<T, N> usecos = (quadrant == 0) || eq4; - - vec<T, N> formula = trig_sincos(folded, usecos); - - formula = select(flip_sign, -formula, formula); - return formula; - } - - template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)> - KFR_SINTRIN vec<T, N> fastsin(vec<T, N> x) - { - constexpr vec<T, N> msk = broadcast<N>(highbitmask<T>); - - constexpr static T c2 = -0.16665853559970855712890625; - constexpr static T c4 = +8.31427983939647674560546875e-3; - constexpr static T c6 = -1.85423981747590005397796630859375e-4; - - const vec<T, N> pi = c_pi<T>; - - x -= pi; - vec<T, N> y = abs(x); - y = select(y > c_pi<T, 1, 2>, pi - y, y); - y = y ^ (msk & ~x); - - vec<T, N> y2 = y * y; - vec<T, N> formula = c6; - vec<T, N> y3 = y2 * y; - formula = fmadd(formula, y2, c4); - formula = fmadd(formula, y2, c2); - formula = formula * y3 + y; - return formula; - } - - template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)> - KFR_SINTRIN vec<T, N> fastcos(vec<T, N> x) - { - x += c_pi<T, 1, 2>; - x = select(x >= c_pi<T, 2>, x - c_pi<T, 2>, x); - return fastsin(x); - } - template <typename T, size_t N, KFR_ENABLE_IF(N > 1 && is_f_class<T>::value)> - KFR_SINTRIN vec<T, N> sincos(vec<T, N> x) - { - return sincos_mask(x, internal::oddmask<T, N>()); - } - - template <typename T, size_t N, KFR_ENABLE_IF(N > 1 && is_f_class<T>::value)> - KFR_SINTRIN vec<T, N> cossin(vec<T, N> x) - { - return sincos_mask(x, internal::evenmask<T, N>()); - } - - template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)> - KFR_SINTRIN vec<T, N> sinc(vec<T, N> x) - { - return select(abs(x) <= c_epsilon<T>, T(1), sin(x) / x); - } - template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>> - KFR_SINTRIN vec<Tout, N> sin(vec<T, N> x) - { - return sin(cast<Tout>(x)); - } - template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>> - KFR_SINTRIN vec<Tout, N> cos(vec<T, N> x) - { - return cos(cast<Tout>(x)); - } - template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>> - KFR_SINTRIN vec<Tout, N> fastsin(vec<T, N> x) - { - return fastsin(cast<Tout>(x)); - } - template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>> - KFR_SINTRIN vec<Tout, N> fastcos(vec<T, N> x) - { - return fastcos(cast<Tout>(x)); - } - template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>> - KFR_SINTRIN vec<Tout, N> sincos(vec<T, N> x) - { - return sincos(cast<Tout>(x)); - } - template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>> - KFR_SINTRIN vec<Tout, N> cossin(vec<T, N> x) - { - return cossin(cast<Tout>(x)); - } - template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>> - KFR_SINTRIN vec<Tout, N> sinc(vec<T, N> x) - { - return sinc(cast<Tout>(x)); - } - - template <typename T> - KFR_SINTRIN T sindeg(const T& x) - { - return sin(x * c_degtorad<T>); - } - template <typename T> - KFR_SINTRIN T cosdeg(const T& x) - { - return cos(x * c_degtorad<T>); - } - - template <typename T> - KFR_SINTRIN T fastsindeg(const T& x) - { - return fastsin(x * c_degtorad<T>); - } - template <typename T> - KFR_SINTRIN T fastcosdeg(const T& x) - { - return fastcos(x * c_degtorad<T>); - } - - template <typename T> - KFR_SINTRIN T sincosdeg(const T& x) - { - return sincos(x * c_degtorad<T>); - } - template <typename T> - KFR_SINTRIN T cossindeg(const T& x) - { - return cossin(x * c_degtorad<T>); - } - - KFR_HANDLE_SCALAR(sin) - KFR_HANDLE_SCALAR(cos) - KFR_HANDLE_SCALAR(fastsin) - KFR_HANDLE_SCALAR(fastcos) - KFR_HANDLE_SCALAR(sincos) - KFR_HANDLE_SCALAR(cossin) - KFR_HANDLE_SCALAR(sinc) - - KFR_SPEC_FN(in_sin_cos, sin) - KFR_SPEC_FN(in_sin_cos, cos) - KFR_SPEC_FN(in_sin_cos, fastsin) - KFR_SPEC_FN(in_sin_cos, fastcos) - KFR_SPEC_FN(in_sin_cos, sincos_mask) - KFR_SPEC_FN(in_sin_cos, sincos) - KFR_SPEC_FN(in_sin_cos, cossin) - KFR_SPEC_FN(in_sin_cos, sinc) - KFR_SPEC_FN(in_sin_cos, sindeg) - KFR_SPEC_FN(in_sin_cos, cosdeg) - KFR_SPEC_FN(in_sin_cos, fastsindeg) - KFR_SPEC_FN(in_sin_cos, fastcosdeg) - KFR_SPEC_FN(in_sin_cos, sincosdeg) - KFR_SPEC_FN(in_sin_cos, cossindeg) -}; -} - -namespace native -{ -using fn_sin = internal::in_sin_cos<>::fn_sin; -template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> sin(const T1& x) +template <typename T> +constexpr static T fold_constant_div = choose_const<T>(0x1.921fb6p-1f, 0x1.921fb54442d18p-1); + +template <typename T> +constexpr static T fold_constant_hi = choose_const<T>(0x1.922000p-1f, 0x1.921fb40000000p-1); +template <typename T> +constexpr static T fold_constant_rem1 = choose_const<T>(-0x1.2ae000p-19f, 0x1.4442d00000000p-25); +template <typename T> +constexpr static T fold_constant_rem2 = choose_const<T>(-0x1.de973ep-32f, 0x1.8469898cc5170p-49); + +template <typename T, size_t N> +KFR_SINTRIN vec<T, N> trig_horner(vec<T, N>, mask<T, N> msk, T a0, T b0) { - return internal::in_sin_cos<>::sin(x); + return select(msk, a0, b0); } -template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_sin, E1> sin(E1&& x) +template <typename T, size_t N, typename... Ts> +KFR_SINTRIN vec<T, N> trig_horner(vec<T, N> x, mask<T, N> msk, T a0, T b0, T a1, T b1, Ts... values) { - return { fn_sin(), std::forward<E1>(x) }; + return fmadd(trig_horner(x, msk, a1, b1, values...), x, select(msk, a0, b0)); } -using fn_cos = internal::in_sin_cos<>::fn_cos; -template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> cos(const T1& x) +template <typename T, size_t N, typename Tprecise = f64> +KFR_SINTRIN vec<T, N> trig_fold(vec<T, N> x, vec<itype<T>, N>& quadrant) { - return internal::in_sin_cos<>::cos(x); + const vec<T, N> xabs = abs(x); + constexpr vec<T, N> div = fold_constant_div<T>; + vec<T, N> y = floor(xabs / div); + quadrant = cast<itype<T>>(y - floor(y * T(1.0 / 16.0)) * T(16.0)); + + const mask<T, N> msk = bitcast<T>((quadrant & 1) != 0); + quadrant = kfr::select(msk, quadrant + 1, quadrant); + y = select(msk, y + T(1.0), y); + quadrant = quadrant & 7; + + constexpr vec<Tprecise, N> hi = cast<Tprecise>(fold_constant_hi<T>); + constexpr vec<T, N> rem1 = fold_constant_rem1<T>; + constexpr vec<T, N> rem2 = fold_constant_rem2<T>; + return cast<T>(cast<Tprecise>(xabs) - cast<Tprecise>(y) * hi) - y * rem1 - y * rem2; } -template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_cos, E1> cos(E1&& x) +template <size_t N> +KFR_SINTRIN vec<f32, N> trig_sincos(vec<f32, N> folded, mask<f32, N> cosmask) { - return { fn_cos(), std::forward<E1>(x) }; + constexpr f32 sin_c2 = -0x2.aaaaacp-4f; + constexpr f32 sin_c4 = 0x2.222334p-8f; + constexpr f32 sin_c6 = -0xd.0566ep-16f; + constexpr f32 sin_c8 = 0x3.64cc1cp-20f; + constexpr f32 sin_c10 = -0x5.6c4a4p-24f; + constexpr f32 cos_c2 = -0x8.p-4f; + constexpr f32 cos_c4 = 0xa.aaaabp-8f; + constexpr f32 cos_c6 = -0x5.b05d48p-12f; + constexpr f32 cos_c8 = 0x1.a065f8p-16f; + constexpr f32 cos_c10 = -0x4.cd156p-24f; + + const vec<f32, N> x2 = folded * folded; + + vec<f32, N> formula = trig_horner(x2, cosmask, 1.0f, 1.0f, cos_c2, sin_c2, cos_c4, sin_c4, cos_c6, sin_c6, + cos_c8, sin_c8, cos_c10, sin_c10); + + formula = select(cosmask, formula, formula * folded); + return formula; } -using fn_fastsin = internal::in_sin_cos<>::fn_fastsin; -template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> fastsin(const T1& x) + +template <size_t N> +KFR_SINTRIN vec<f64, N> trig_sincos(vec<f64, N> folded, mask<f64, N> cosmask) { - return internal::in_sin_cos<>::fastsin(x); + constexpr f64 sin_c2 = -0x2.aaaaaaaaaaaaap-4; + constexpr f64 sin_c4 = 0x2.22222222220cep-8; + constexpr f64 sin_c6 = -0xd.00d00cffd6618p-16; + constexpr f64 sin_c8 = 0x2.e3bc744fb879ep-20; + constexpr f64 sin_c10 = -0x6.b99034c1467a4p-28; + constexpr f64 sin_c12 = 0xb.0711ea8fe8ee8p-36; + constexpr f64 sin_c14 = -0xb.7e010897e55dp-44; + constexpr f64 sin_c16 = -0xb.64eac07f1d6bp-48; + constexpr f64 cos_c2 = -0x8.p-4; + constexpr f64 cos_c4 = 0xa.aaaaaaaaaaaa8p-8; + constexpr f64 cos_c6 = -0x5.b05b05b05ad28p-12; + constexpr f64 cos_c8 = 0x1.a01a01a0022e6p-16; + constexpr f64 cos_c10 = -0x4.9f93ed845de2cp-24; + constexpr f64 cos_c12 = 0x8.f76bc015abe48p-32; + constexpr f64 cos_c14 = -0xc.9bf2dbe00379p-40; + constexpr f64 cos_c16 = 0xd.1232ac32f7258p-48; + + vec<f64, N> x2 = folded * folded; + vec<f64, N> formula = + trig_horner(x2, cosmask, 1.0, 1.0, cos_c2, sin_c2, cos_c4, sin_c4, cos_c6, sin_c6, cos_c8, sin_c8, + cos_c10, sin_c10, cos_c12, sin_c12, cos_c14, sin_c14, cos_c16, sin_c16); + + formula = select(cosmask, formula, formula * folded); + return formula; } -template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_fastsin, E1> fastsin(E1&& x) +template <typename T, size_t N, typename = u8[N > 1]> +KFR_SINTRIN vec<T, N> sincos_mask(vec<T, N> x_full, mask<T, N> cosmask) { - return { fn_fastsin(), std::forward<E1>(x) }; + vec<itype<T>, N> quadrant; + vec<T, N> folded = trig_fold(x_full, quadrant); + + mask<T, N> flip_sign = select(cosmask, (quadrant == 2) || (quadrant == 4), quadrant >= 4); + + mask<T, N> usecos = (quadrant == 2) || (quadrant == 6); + usecos = usecos ^ cosmask; + + vec<T, N> formula = trig_sincos(folded, usecos); + + mask<T, N> negmask = x_full < 0; + + flip_sign = flip_sign ^ (negmask & ~cosmask); + + formula = select(flip_sign, -formula, formula); + return formula; } -using fn_fastcos = internal::in_sin_cos<>::fn_fastcos; -template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> fastcos(const T1& x) +template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)> +KFR_SINTRIN vec<T, N> sin(vec<T, N> x) { - return internal::in_sin_cos<>::fastcos(x); + vec<itype<T>, N> quadrant; + vec<T, N> folded = trig_fold(x, quadrant); + + mask<T, N> flip_sign = quadrant >= 4; + mask<T, N> usecos = (quadrant == 2) || (quadrant == 6); + + vec<T, N> formula = trig_sincos(folded, usecos); + + formula = select(flip_sign ^ x.asmask(), -formula, formula); + return formula; } -template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_fastcos, E1> fastcos(E1&& x) +template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)> +KFR_SINTRIN vec<T, N> cos(vec<T, N> x) { - return { fn_fastcos(), std::forward<E1>(x) }; + vec<itype<T>, N> quadrant; + vec<T, N> folded = trig_fold(x, quadrant); + + mask<T, N> eq4 = (quadrant == 4); + mask<T, N> flip_sign = (quadrant == 2) || eq4; + mask<T, N> usecos = (quadrant == 0) || eq4; + + vec<T, N> formula = trig_sincos(folded, usecos); + + formula = select(flip_sign, -formula, formula); + return formula; } -using fn_sincos_mask = internal::in_sin_cos<>::fn_sincos_mask; -template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> sincos_mask(const T1& x) +template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)> +KFR_SINTRIN vec<T, N> fastsin(vec<T, N> x) +{ + constexpr vec<T, N> msk = broadcast<N>(internal::highbitmask<T>); + + constexpr static T c2 = -0.16665853559970855712890625; + constexpr static T c4 = +8.31427983939647674560546875e-3; + constexpr static T c6 = -1.85423981747590005397796630859375e-4; + + const vec<T, N> pi = c_pi<T>; + + x -= pi; + vec<T, N> y = abs(x); + y = select(y > c_pi<T, 1, 2>, pi - y, y); + y = y ^ (msk & ~x); + + vec<T, N> y2 = y * y; + vec<T, N> formula = c6; + vec<T, N> y3 = y2 * y; + formula = fmadd(formula, y2, c4); + formula = fmadd(formula, y2, c2); + formula = formula * y3 + y; + return formula; +} + +template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)> +KFR_SINTRIN vec<T, N> fastcos(vec<T, N> x) { - return internal::in_sin_cos<>::sincos_mask(x); + x += c_pi<T, 1, 2>; + x = select(x >= c_pi<T, 2>, x - c_pi<T, 2>, x); + return fastsin(x); } -template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_sincos_mask, E1> sincos_mask(E1&& x) +template <typename T, size_t N, KFR_ENABLE_IF(N > 1 && is_f_class<T>::value)> +KFR_SINTRIN vec<T, N> sincos(vec<T, N> x) { - return { fn_sincos_mask(), std::forward<E1>(x) }; + return sincos_mask(x, internal::oddmask<T, N>()); } -using fn_sincos = internal::in_sin_cos<>::fn_sincos; -template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> sincos(const T1& x) +template <typename T, size_t N, KFR_ENABLE_IF(N > 1 && is_f_class<T>::value)> +KFR_SINTRIN vec<T, N> cossin(vec<T, N> x) { - return internal::in_sin_cos<>::sincos(x); + return sincos_mask(x, internal::evenmask<T, N>()); } -template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_sincos, E1> sincos(E1&& x) +template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)> +KFR_SINTRIN vec<T, N> sinc(vec<T, N> x) { - return { fn_sincos(), std::forward<E1>(x) }; + return select(abs(x) <= c_epsilon<T>, T(1), sin(x) / x); } -using fn_cossin = internal::in_sin_cos<>::fn_cossin; -template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> cossin(const T1& x) +template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>> +KFR_SINTRIN vec<Tout, N> sin(vec<T, N> x) { - return internal::in_sin_cos<>::cossin(x); + return sin(cast<Tout>(x)); } -template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_cossin, E1> cossin(E1&& x) +template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>> +KFR_SINTRIN vec<Tout, N> cos(vec<T, N> x) +{ + return cos(cast<Tout>(x)); +} + +template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>> +KFR_SINTRIN vec<Tout, N> fastsin(vec<T, N> x) { - return { fn_cossin(), std::forward<E1>(x) }; + return fastsin(cast<Tout>(x)); +} + +template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>> +KFR_SINTRIN vec<Tout, N> fastcos(vec<T, N> x) +{ + return fastcos(cast<Tout>(x)); +} + +template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>> +KFR_SINTRIN vec<Tout, N> sincos(vec<T, N> x) +{ + return sincos(cast<Tout>(x)); +} + +template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>> +KFR_SINTRIN vec<Tout, N> cossin(vec<T, N> x) +{ + return cossin(cast<Tout>(x)); +} + +template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>> +KFR_SINTRIN vec<Tout, N> sinc(vec<T, N> x) +{ + return sinc(cast<Tout>(x)); +} + +template <typename T> +KFR_SINTRIN T sindeg(const T& x) +{ + return sin(x * c_degtorad<T>); +} + +template <typename T> +KFR_SINTRIN T cosdeg(const T& x) +{ + return cos(x * c_degtorad<T>); +} + +template <typename T> +KFR_SINTRIN T fastsindeg(const T& x) +{ + return fastsin(x * c_degtorad<T>); +} + +template <typename T> +KFR_SINTRIN T fastcosdeg(const T& x) +{ + return fastcos(x * c_degtorad<T>); +} + +template <typename T> +KFR_SINTRIN T sincosdeg(const T& x) +{ + return sincos(x * c_degtorad<T>); +} + +template <typename T> +KFR_SINTRIN T cossindeg(const T& x) +{ + return cossin(x * c_degtorad<T>); +} + +KFR_HANDLE_SCALAR_1(sin) +KFR_HANDLE_SCALAR_1(cos) +KFR_HANDLE_SCALAR_1(fastsin) +KFR_HANDLE_SCALAR_1(fastcos) +KFR_HANDLE_SCALAR_1(sincos) +KFR_HANDLE_SCALAR_1(cossin) +KFR_HANDLE_SCALAR_1(sinc) + +KFR_FN(sin) +KFR_FN(cos) +KFR_FN(fastsin) +KFR_FN(fastcos) +KFR_FN(sincos) +KFR_FN(cossin) +KFR_FN(sinc) } -using fn_sindeg = internal::in_sin_cos<>::fn_sindeg; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> sindeg(const T1& x) +KFR_INTRIN ftype<T1> sin(const T1& x) { - return internal::in_sin_cos<>::sindeg(x); + return internal::sin(x); } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_sindeg, E1> sindeg(E1&& x) +KFR_INTRIN expr_func<internal::fn_sin, E1> sin(E1&& x) { - return { fn_sindeg(), std::forward<E1>(x) }; + return { {}, std::forward<E1>(x) }; } -using fn_cosdeg = internal::in_sin_cos<>::fn_cosdeg; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> cosdeg(const T1& x) +KFR_INTRIN ftype<T1> cos(const T1& x) { - return internal::in_sin_cos<>::cosdeg(x); + return internal::cos(x); } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_cosdeg, E1> cosdeg(E1&& x) +KFR_INTRIN expr_func<internal::fn_cos, E1> cos(E1&& x) { - return { fn_cosdeg(), std::forward<E1>(x) }; + return { {}, std::forward<E1>(x) }; } -using fn_fastsindeg = internal::in_sin_cos<>::fn_fastsindeg; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> fastsindeg(const T1& x) +KFR_INTRIN ftype<T1> fastsin(const T1& x) { - return internal::in_sin_cos<>::fastsindeg(x); + return internal::fastsin(x); } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_fastsindeg, E1> fastsindeg(E1&& x) +KFR_INTRIN expr_func<internal::fn_fastsin, E1> fastsin(E1&& x) { - return { fn_fastsindeg(), std::forward<E1>(x) }; + return { {}, std::forward<E1>(x) }; } -using fn_fastcosdeg = internal::in_sin_cos<>::fn_fastcosdeg; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> fastcosdeg(const T1& x) +KFR_INTRIN ftype<T1> fastcos(const T1& x) { - return internal::in_sin_cos<>::fastcosdeg(x); + return internal::fastcos(x); } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_fastcosdeg, E1> fastcosdeg(E1&& x) +KFR_INTRIN expr_func<internal::fn_fastcos, E1> fastcos(E1&& x) { - return { fn_fastcosdeg(), std::forward<E1>(x) }; + return { {}, std::forward<E1>(x) }; } -using fn_sincosdeg = internal::in_sin_cos<>::fn_sincosdeg; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> sincosdeg(const T1& x) +KFR_INTRIN ftype<T1> sincos(const T1& x) { - return internal::in_sin_cos<>::sincosdeg(x); + return internal::sincos(x); } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_sincosdeg, E1> sincosdeg(E1&& x) +KFR_INTRIN expr_func<internal::fn_sincos, E1> sincos(E1&& x) { - return { fn_sincosdeg(), std::forward<E1>(x) }; + return { {}, std::forward<E1>(x) }; } -using fn_cossindeg = internal::in_sin_cos<>::fn_cossindeg; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> cossindeg(const T1& x) +KFR_INTRIN ftype<T1> cossin(const T1& x) { - return internal::in_sin_cos<>::cossindeg(x); + return internal::cossin(x); } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_cossindeg, E1> cossindeg(E1&& x) +KFR_INTRIN expr_func<internal::fn_cossin, E1> cossin(E1&& x) { - return { fn_cossindeg(), std::forward<E1>(x) }; + return { {}, std::forward<E1>(x) }; } -using fn_sinc = internal::in_sin_cos<>::fn_sinc; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> KFR_INTRIN ftype<T1> sinc(const T1& x) { - return internal::in_sin_cos<>::sinc(x); + return internal::sinc(x); } + template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_sinc, E1> sinc(E1&& x) +KFR_INTRIN expr_func<internal::fn_sinc, E1> sinc(E1&& x) { return { {}, std::forward<E1>(x) }; } template <typename T> -inline T sin2x(const T& sinx, const T& cosx) +KFR_SINTRIN T sin2x(const T& sinx, const T& cosx) { return 2 * sinx * cosx; } + template <typename T> -inline T sin3x(const T& sinx, const T& cosx) +KFR_SINTRIN T sin3x(const T& sinx, const T& cosx) { return sinx * (-1 + 4 * sqr(cosx)); } template <typename T> -inline T cos2x(const T& sinx, const T& cosx) +KFR_SINTRIN T cos2x(const T& sinx, const T& cosx) { return sqr(cosx) - sqr(sinx); } + template <typename T> -inline T cos3x(const T& sinx, const T& cosx) +KFR_SINTRIN T cos3x(const T& sinx, const T& cosx) { return cosx * (1 - 4 * sqr(sinx)); } } -} - -#pragma clang diagnostic pop diff --git a/include/kfr/base/sqrt.hpp b/include/kfr/base/sqrt.hpp @@ -30,77 +30,42 @@ namespace kfr namespace internal { -template <cpu_t c = cpu_t::native> -struct in_sqrt : in_sqrt<older(c)> -{ - struct fn_sqrt : fn_disabled - { - }; -}; - -template <> -struct in_sqrt<cpu_t::common> -{ - constexpr static cpu_t cpu = cpu_t::common; - - template <size_t N> - KFR_SINTRIN vec<f32, N> sqrt(vec<f32, N> x) - { - return apply([](float xx) { return std::sqrt(xx); }, x); - } - template <size_t N> - KFR_SINTRIN vec<f64, N> sqrt(vec<f64, N> x) - { - return apply([](double xx) { return std::sqrt(xx); }, x); - } +#if defined CID_ARCH_SSE2 - KFR_HANDLE_SCALAR(sqrt) - KFR_SPEC_FN(in_sqrt, sqrt) -}; +KFR_SINTRIN f32x1 sqrt(f32x1 x) { return slice<0, 1>(tovec(_mm_sqrt_ss(*extend<4>(x)))); } +KFR_SINTRIN f64x1 sqrt(f64x1 x) { return slice<0, 1>(tovec(_mm_sqrt_sd(_mm_setzero_pd(), *extend<2>(x)))); } +KFR_SINTRIN f32sse sqrt(f32sse x) { return _mm_sqrt_ps(*x); } +KFR_SINTRIN f64sse sqrt(f64sse x) { return _mm_sqrt_pd(*x); } -#ifdef CID_ARCH_X86 - -template <> -struct in_sqrt<cpu_t::sse2> -{ - constexpr static cpu_t cpu = cpu_t::sse2; +#if defined CID_ARCH_AVX +KFR_SINTRIN f32avx sqrt(f32avx x) { return _mm256_sqrt_ps(*x); } +KFR_SINTRIN f64avx sqrt(f64avx x) { return _mm256_sqrt_pd(*x); } +#endif - KFR_SINTRIN f32sse sqrt(f32sse x) { return _mm_sqrt_ps(*x); } - KFR_SINTRIN f64sse sqrt(f64sse x) { return _mm_sqrt_pd(*x); } +KFR_HANDLE_ALL_SIZES_1(sqrt) - KFR_HANDLE_ALL(sqrt) - KFR_HANDLE_SCALAR(sqrt) - KFR_SPEC_FN(in_sqrt, sqrt) -}; +#else -template <> -struct in_sqrt<cpu_t::avx1> : in_sqrt<cpu_t::sse2> +// fallback +template <typename T, size_t N> +KFR_SINTRIN vec<T, N> sqrt(vec<T, N> x) { - constexpr static cpu_t cpu = cpu_t::avx1; - using in_sqrt<cpu_t::sse2>::sqrt; - - KFR_SINTRIN f32avx KFR_USE_CPU(avx) sqrt(f32avx x) { return _mm256_sqrt_ps(*x); } - KFR_SINTRIN f64avx KFR_USE_CPU(avx) sqrt(f64avx x) { return _mm256_sqrt_pd(*x); } - - KFR_HANDLE_ALL(sqrt) - KFR_HANDLE_SCALAR(sqrt) - KFR_SPEC_FN(in_sqrt, sqrt) -}; + return apply([](T x) { return std::sqrt(x); }, x); +} #endif +KFR_HANDLE_SCALAR_1(sqrt) +KFR_FN(sqrt) } -namespace native -{ -using fn_sqrt = internal::in_sqrt<>::fn_sqrt; + template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> KFR_INTRIN ftype<T1> sqrt(const T1& x) { - return internal::in_sqrt<>::sqrt(x); + return internal::sqrt(x); } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_sqrt, E1> sqrt(E1&& x) +KFR_INTRIN expr_func<internal::fn_sqrt, E1> sqrt(E1&& x) { - return { fn_sqrt(), std::forward<E1>(x) }; -} + return { {}, std::forward<E1>(x) }; } } diff --git a/include/kfr/base/tan.hpp b/include/kfr/base/tan.hpp @@ -28,156 +28,129 @@ #include "select.hpp" #include "sin_cos.hpp" -#pragma clang diagnostic push -#if CID_HAS_WARNING("-Winaccessible-base") -#pragma clang diagnostic ignored "-Winaccessible-base" -#endif -#if CID_HAS_WARNING("-Wc99-extensions") -#pragma clang diagnostic ignored "-Wc99-extensions" -#endif - namespace kfr { namespace internal { -template <cpu_t c = cpu_t::native, cpu_t cc = c> -struct in_tan : in_trig<cc>, in_select<cc>, in_round<cc>, in_abs<cc> +template <typename T, size_t N, typename IT = itype<T>> +KFR_SINTRIN vec<T, N> trig_fold_simple(vec<T, N> x_full, mask<T, N>& inverse) +{ + constexpr T pi_14 = c_pi<T, 1, 4>; + + vec<T, N> y = abs(x_full); + vec<T, N> scaled = y / pi_14; + + vec<T, N> k_real = floor(scaled); + vec<IT, N> k = cast<IT>(k_real); + + vec<T, N> x = y - k_real * pi_14; + + mask<T, N> need_offset = (k & 1) != 0; + x = select(need_offset, x - pi_14, x); + + vec<IT, N> k_mod4 = k & 3; + inverse = (k_mod4 == 1) || (k_mod4 == 2); + return x; +} + +template <size_t N> +KFR_SINTRIN vec<f32, N> tan(vec<f32, N> x_full) { -private: - using in_abs<cc>::abs; - using in_round<cc>::floor; - using in_select<cc>::select; - using in_trig<cc>::mask_horner; - - template <typename T, size_t N, typename IT = itype<T>> - KFR_SINTRIN vec<T, N> trig_fold(vec<T, N> x_full, mask<T, N>& inverse) - { - constexpr T pi_14 = c_pi<T, 1, 4>; - - vec<T, N> y = abs(x_full); - vec<T, N> scaled = y / pi_14; - - vec<T, N> k_real = floor(scaled); - vec<IT, N> k = cast<IT>(k_real); - - vec<T, N> x = y - k_real * pi_14; - - mask<T, N> need_offset = (k & 1) != 0; - x = select(need_offset, x - pi_14, x); - - vec<IT, N> k_mod4 = k & 3; - inverse = (k_mod4 == 1) || (k_mod4 == 2); - return x; - } - -public: - template <size_t N> - KFR_SINTRIN vec<f32, N> tan(vec<f32, N> x_full) - { - mask<f32, N> inverse; - const vec<f32, N> x = trig_fold(x_full, inverse); - - constexpr f32 tan_c2 = 0x5.555378p-4; - constexpr f32 tan_c4 = 0x2.225bb8p-4; - constexpr f32 tan_c6 = 0xd.ac3fep-8; - constexpr f32 tan_c8 = 0x6.41644p-8; - constexpr f32 tan_c10 = 0xc.bfe7ep-12; - constexpr f32 tan_c12 = 0x2.6754dp-8; - - constexpr f32 cot_c2 = -0x5.555558p-4; - constexpr f32 cot_c4 = -0x5.b0581p-8; - constexpr f32 cot_c6 = -0x8.ac5ccp-12; - constexpr f32 cot_c8 = -0xd.aaa01p-16; - constexpr f32 cot_c10 = -0x1.a9a9b4p-16; - constexpr f32 cot_c12 = -0x6.f7d4dp-24; - - const vec<f32, N> x2 = x * x; - const vec<f32, N> val = mask_horner(x2, inverse, 1.0f, 1.0f, cot_c2, tan_c2, cot_c4, tan_c4, cot_c6, - tan_c6, cot_c8, tan_c8, cot_c10, tan_c10, cot_c12, tan_c12); - - const vec<f32, N> z = select(inverse, val / -x, val * x); - return mulsign(z, x_full); - } - - template <size_t N> - KFR_SINTRIN vec<f64, N> tan(vec<f64, N> x_full) - { - mask<f64, N> inverse; - const vec<f64, N> x = trig_fold(x_full, inverse); - - constexpr f64 tan_c2 = 0x5.5555554d8e5b8p-4; - constexpr f64 tan_c4 = 0x2.222224820264p-4; - constexpr f64 tan_c6 = 0xd.d0d90de32b3e8p-8; - constexpr f64 tan_c8 = 0x5.99723bdcf5cacp-8; - constexpr f64 tan_c10 = 0x2.434a142e413ap-8; - constexpr f64 tan_c12 = 0xf.2b59061305efp-12; - constexpr f64 tan_c14 = 0x4.a12565071a664p-12; - constexpr f64 tan_c16 = 0x4.dada3797ac1bcp-12; - constexpr f64 tan_c18 = -0x1.a74976b6ea3f3p-12; - constexpr f64 tan_c20 = 0x1.d06a5ae5e4a74p-12; - - constexpr f64 cot_c2 = -0x5.5555555555554p-4; - constexpr f64 cot_c4 = -0x5.b05b05b05b758p-8; - constexpr f64 cot_c6 = -0x8.ab355dffc79a8p-12; - constexpr f64 cot_c8 = -0xd.debbca405c9f8p-16; - constexpr f64 cot_c10 = -0x1.66a8edb99b15p-16; - constexpr f64 cot_c12 = -0x2.450239be0ee92p-20; - constexpr f64 cot_c14 = -0x3.ad6ddb4719438p-24; - constexpr f64 cot_c16 = -0x5.ff4c42741356p-28; - constexpr f64 cot_c18 = -0x9.06881bcdf3108p-32; - constexpr f64 cot_c20 = -0x1.644abedc113cap-32; - - const vec<f64, N> x2 = x * x; - const vec<f64, N> val = - mask_horner(x2, inverse, 1.0, 1.0, cot_c2, tan_c2, cot_c4, tan_c4, cot_c6, tan_c6, cot_c8, tan_c8, - cot_c10, tan_c10, cot_c12, tan_c12, cot_c14, tan_c14, cot_c16, tan_c16, cot_c18, - tan_c18, cot_c20, tan_c20); - - const vec<f64, N> z = select(inverse, val / -x, val * x); - return mulsign(z, x_full); - } - template <typename T> - KFR_SINTRIN T tandeg(const T& x) - { - return tan(x * c_degtorad<T>); - } - - KFR_HANDLE_SCALAR(tan) - KFR_SPEC_FN(in_tan, tan) - KFR_SPEC_FN(in_tan, tandeg) -}; + mask<f32, N> inverse; + const vec<f32, N> x = trig_fold_simple(x_full, inverse); + + constexpr f32 tan_c2 = 0x5.555378p-4; + constexpr f32 tan_c4 = 0x2.225bb8p-4; + constexpr f32 tan_c6 = 0xd.ac3fep-8; + constexpr f32 tan_c8 = 0x6.41644p-8; + constexpr f32 tan_c10 = 0xc.bfe7ep-12; + constexpr f32 tan_c12 = 0x2.6754dp-8; + + constexpr f32 cot_c2 = -0x5.555558p-4; + constexpr f32 cot_c4 = -0x5.b0581p-8; + constexpr f32 cot_c6 = -0x8.ac5ccp-12; + constexpr f32 cot_c8 = -0xd.aaa01p-16; + constexpr f32 cot_c10 = -0x1.a9a9b4p-16; + constexpr f32 cot_c12 = -0x6.f7d4dp-24; + + const vec<f32, N> x2 = x * x; + const vec<f32, N> val = trig_horner(x2, inverse, 1.0f, 1.0f, cot_c2, tan_c2, cot_c4, tan_c4, cot_c6, + tan_c6, cot_c8, tan_c8, cot_c10, tan_c10, cot_c12, tan_c12); + + const vec<f32, N> z = select(inverse, val / -x, val * x); + return mulsign(z, x_full); } -namespace native +template <size_t N> +KFR_SINTRIN vec<f64, N> tan(vec<f64, N> x_full) { -using fn_tan = internal::in_tan<>::fn_tan; + mask<f64, N> inverse; + const vec<f64, N> x = trig_fold_simple(x_full, inverse); + + constexpr f64 tan_c2 = 0x5.5555554d8e5b8p-4; + constexpr f64 tan_c4 = 0x2.222224820264p-4; + constexpr f64 tan_c6 = 0xd.d0d90de32b3e8p-8; + constexpr f64 tan_c8 = 0x5.99723bdcf5cacp-8; + constexpr f64 tan_c10 = 0x2.434a142e413ap-8; + constexpr f64 tan_c12 = 0xf.2b59061305efp-12; + constexpr f64 tan_c14 = 0x4.a12565071a664p-12; + constexpr f64 tan_c16 = 0x4.dada3797ac1bcp-12; + constexpr f64 tan_c18 = -0x1.a74976b6ea3f3p-12; + constexpr f64 tan_c20 = 0x1.d06a5ae5e4a74p-12; + + constexpr f64 cot_c2 = -0x5.5555555555554p-4; + constexpr f64 cot_c4 = -0x5.b05b05b05b758p-8; + constexpr f64 cot_c6 = -0x8.ab355dffc79a8p-12; + constexpr f64 cot_c8 = -0xd.debbca405c9f8p-16; + constexpr f64 cot_c10 = -0x1.66a8edb99b15p-16; + constexpr f64 cot_c12 = -0x2.450239be0ee92p-20; + constexpr f64 cot_c14 = -0x3.ad6ddb4719438p-24; + constexpr f64 cot_c16 = -0x5.ff4c42741356p-28; + constexpr f64 cot_c18 = -0x9.06881bcdf3108p-32; + constexpr f64 cot_c20 = -0x1.644abedc113cap-32; + + const vec<f64, N> x2 = x * x; + const vec<f64, N> val = trig_horner(x2, inverse, 1.0, 1.0, cot_c2, tan_c2, cot_c4, tan_c4, cot_c6, tan_c6, + cot_c8, tan_c8, cot_c10, tan_c10, cot_c12, tan_c12, cot_c14, tan_c14, + cot_c16, tan_c16, cot_c18, tan_c18, cot_c20, tan_c20); + + const vec<f64, N> z = select(inverse, val / -x, val * x); + return mulsign(z, x_full); +} +template <typename T> +KFR_SINTRIN T tandeg(const T& x) +{ + return tan(x * c_degtorad<T>); +} + +KFR_HANDLE_SCALAR(tan) +KFR_FN(tan) +KFR_FN(tandeg) +} + template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> tan(const T1& x) +KFR_INTRIN T1 tan(const T1& x) { - return internal::in_tan<>::tan(x); + return internal::tan(x); } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_tan, E1> tan(E1&& x) +KFR_INTRIN expr_func<internal::fn_tan, E1> tan(E1&& x) { - return { fn_tan(), std::forward<E1>(x) }; + return { {}, std::forward<E1>(x) }; } -using fn_tandeg = internal::in_tan<>::fn_tandeg; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> tandeg(const T1& x) +KFR_INTRIN T1 tandeg(const T1& x) { - return internal::in_tan<>::tandeg(x); + return internal::tandeg(x); } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_tandeg, E1> tandeg(E1&& x) +KFR_INTRIN expr_func<internal::fn_tandeg, E1> tandeg(E1&& x) { - return { fn_tandeg(), std::forward<E1>(x) }; + return { {}, std::forward<E1>(x) }; } } -} - -#pragma clang diagnostic pop diff --git a/include/kfr/dft/conv.hpp b/include/kfr/dft/conv.hpp @@ -27,7 +27,6 @@ #include "../base/memory.hpp" #include "../base/read_write.hpp" #include "../base/vec.hpp" -#include "../expressions/operators.hpp" #include "fft.hpp" diff --git a/include/kfr/dft/fft.hpp b/include/kfr/dft/fft.hpp @@ -215,8 +215,8 @@ KFR_NOINLINE cvec<T, 1> calculate_twiddle(size_t n, size_t size) else { double kth = c_pi<double, 2> * (n / static_cast<double>(size)); - double tcos = +kfr::native::cos(kth); - double tsin = -kfr::native::sin(kth); + double tcos = +kfr::cos(kth); + double tsin = -kfr::sin(kth); return make_vector(static_cast<T>(tcos), static_cast<T>(tsin)); } } diff --git a/include/kfr/dft/ft.hpp b/include/kfr/dft/ft.hpp @@ -485,7 +485,7 @@ constexpr cvec<T, N> twiddleimagmask() template <typename T, size_t N> KFR_NOINLINE static vec<T, N> cossin_conj(vec<T, N> x) { - return cconj(in_sin_cos<cpu_t::native>::cossin(x)); + return cconj(cossin(x)); } template <size_t k, size_t size, bool inverse = false, typename T, size_t width> diff --git a/include/kfr/dsp/fir.hpp b/include/kfr/dsp/fir.hpp @@ -26,7 +26,6 @@ #include "../base/sin_cos.hpp" #include "../base/vec.hpp" #include "../expressions/basic.hpp" -#include "../expressions/operators.hpp" #include "../expressions/reduce.hpp" #include "window.hpp" @@ -38,90 +37,72 @@ using fir_taps = univector<T, Size>; namespace internal { -template <cpu_t cpu = cpu_t::native> -struct in_fir : in_reduce<cpu> +template <size_t tapcount, typename T, typename E1> +struct expression_short_fir : expression<E1> { -private: - using in_reduce<cpu>::dotproduct; + static_assert(is_poweroftwo(tapcount), "tapcount must be a power of two"); -public: - template <size_t tapcount, typename T, typename E1> - struct expression_short_fir : expression<E1> + expression_short_fir(E1&& e1, const array_ref<T>& taps) + : expression<E1>(std::forward<E1>(e1)), taps(taps), delayline(0) { - static_assert(is_poweroftwo(tapcount), "tapcount must be a power of two"); - template <cpu_t newcpu> - using retarget_this = - typename in_fir<newcpu>::template expression_short_fir<tapcount, T, retarget<E1, newcpu>>; - - expression_short_fir(E1&& e1, const array_ref<T>& taps) - : expression<E1>(std::forward<E1>(e1)), taps(taps), delayline(0) - { - } - template <typename U, size_t N> - KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N> x) const - { - vec<T, N> in = cast<T>(this->argument_first(index, x)); + } + template <typename U, size_t N> + KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N> x) const + { + vec<T, N> in = cast<T>(this->argument_first(index, x)); - vec<T, N> out = in * taps[0]; - cfor(csize<1>, csize<tapcount>, - [&](auto I) { out = out + concat_and_slice<tapcount - 1 - I, N>(delayline, in) * taps[I]; }); - delayline = concat_and_slice<N, tapcount - 1>(delayline, in); + vec<T, N> out = in * taps[0]; + cfor(csize<1>, csize<tapcount>, + [&](auto I) { out = out + concat_and_slice<tapcount - 1 - I, N>(delayline, in) * taps[I]; }); + delayline = concat_and_slice<N, tapcount - 1>(delayline, in); - return cast<U>(out); - } - vec<T, tapcount> taps; - mutable vec<T, tapcount - 1> delayline; - }; + return cast<U>(out); + } + vec<T, tapcount> taps; + mutable vec<T, tapcount - 1> delayline; +}; - template <typename T, typename E1> - struct expression_fir : expression<E1> +template <typename T, typename E1> +struct expression_fir : expression<E1> +{ + expression_fir(E1&& e1, const array_ref<const T>& taps) + : expression<E1>(std::forward<E1>(e1)), taps(taps), delayline(taps.size(), T()), delayline_cursor(0) + { + } + template <typename U, size_t N> + KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N> x) const { - template <cpu_t newcpu> - using retarget_this = typename in_fir<newcpu>::template expression_fir<T, retarget<E1, newcpu>>; + const size_t tapcount = taps.size(); + const vec<T, N> input = cast<T>(this->argument_first(index, x)); - expression_fir(E1&& e1, const array_ref<const T>& taps) - : expression<E1>(std::forward<E1>(e1)), taps(taps), delayline(taps.size(), T()), - delayline_cursor(0) + vec<T, N> output; + size_t cursor = delayline_cursor; + KFR_LOOP_NOUNROLL + for (size_t i = 0; i < N; i++) { + delayline.ringbuf_write(cursor, input[i]); + output(i) = dotproduct(taps, delayline.slice(cursor) /*, tapcount - cursor*/) + + dotproduct(taps.slice(tapcount - cursor), delayline /*, cursor*/); } - template <typename U, size_t N> - KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N> x) const - { - const size_t tapcount = taps.size(); - const vec<T, N> input = cast<T>(this->argument_first(index, x)); - - vec<T, N> output; - size_t cursor = delayline_cursor; - KFR_LOOP_NOUNROLL - for (size_t i = 0; i < N; i++) - { - delayline.ringbuf_write(cursor, input[i]); - output(i) = dotproduct(taps, delayline.slice(cursor) /*, tapcount - cursor*/) + - dotproduct(taps.slice(tapcount - cursor), delayline /*, cursor*/); - } - delayline_cursor = cursor; - return cast<U>(output); - } - univector_dyn<T> taps; - mutable univector_dyn<T> delayline; - mutable size_t delayline_cursor; - }; + delayline_cursor = cursor; + return cast<U>(output); + } + univector_dyn<T> taps; + mutable univector_dyn<T> delayline; + mutable size_t delayline_cursor; }; } -namespace native -{ template <typename T, typename E1, size_t Tag> -KFR_INLINE internal::in_fir<>::expression_fir<T, E1> fir(E1&& e1, const univector<T, Tag>& taps) +KFR_INLINE internal::expression_fir<T, E1> fir(E1&& e1, const univector<T, Tag>& taps) { - return internal::in_fir<>::expression_fir<T, E1>(std::forward<E1>(e1), taps.ref()); + return internal::expression_fir<T, E1>(std::forward<E1>(e1), taps.ref()); } template <typename T, size_t TapCount, typename E1> -KFR_INLINE internal::in_fir<>::expression_short_fir<TapCount, T, E1> short_fir( - E1&& e1, const univector<T, TapCount>& taps) +KFR_INLINE internal::expression_short_fir<TapCount, T, E1> short_fir(E1&& e1, + const univector<T, TapCount>& taps) { static_assert(TapCount >= 1 && TapCount < 16, "Use short_fir only for small FIR filters"); - return internal::in_fir<>::expression_short_fir<TapCount, T, E1>(std::forward<E1>(e1), taps.ref()); -} + return internal::expression_short_fir<TapCount, T, E1>(std::forward<E1>(e1), taps.ref()); } } diff --git a/include/kfr/dsp/fir_design.hpp b/include/kfr/dsp/fir_design.hpp @@ -24,218 +24,124 @@ #include "fir.hpp" -#pragma clang diagnostic push -#if CID_HAS_WARNING("-Winaccessible-base") -#pragma clang diagnostic ignored "-Winaccessible-base" -#endif - namespace kfr { namespace internal { -template <cpu_t cpu = cpu_t::native> -struct in_fir_design : in_sqrt<cpu>, - in_abs<cpu>, - in_log_exp<cpu>, - in_sin_cos<cpu>, - in_window<cpu>, - in_reduce<cpu> +template <typename T> +KFR_SINTRIN void fir_lowpass(univector_ref<T> taps, T cutoff, const expression_pointer<T>& window, + bool normalize = true) { -private: - using in_sqrt<cpu>::sqrt; - using in_abs<cpu>::abs; - using in_log_exp<cpu>::log; - using in_log_exp<cpu>::exp; - using in_log_exp<cpu>::log_fmadd; - using in_log_exp<cpu>::exp_fmadd; - using in_log_exp<cpu>::exp10; - using typename in_sin_cos<cpu>::fn_sinc; - using in_reduce<cpu>::reduce; - using in_reduce<cpu>::dotproduct; - using in_reduce<cpu>::sum; - -public: - template <typename T> - KFR_SINTRIN void fir_lowpass(univector_ref<T> taps, T cutoff, const expression_pointer<T>& window, - bool normalize = true) + const T scale = 2.0 * cutoff; + taps = bind_expression(fn_sinc(), + symmlinspace<T, true>((taps.size() - 1) * cutoff * c_pi<T>, taps.size(), true)) * + scale * window; + + if (is_odd(taps.size())) + taps[taps.size() / 2] = scale; + + if (normalize) { - const T scale = 2.0 * cutoff; - taps = bind_expression(fn_sinc(), symmlinspace<T, true>((taps.size() - 1) * cutoff * c_pi<T>, - taps.size(), true)) * - scale * window; - - if (is_odd(taps.size())) - taps[taps.size() / 2] = scale; - - if (normalize) - { - const T invsum = reciprocal(sum(taps)); - taps = taps * invsum; - } + const T invsum = reciprocal(sum(taps)); + taps = taps * invsum; } - template <typename T> - KFR_SINTRIN void fir_highpass(univector_ref<T> taps, T cutoff, const expression_pointer<T>& window, - bool normalize = true) +} +template <typename T> +KFR_SINTRIN void fir_highpass(univector_ref<T> taps, T cutoff, const expression_pointer<T>& window, + bool normalize = true) +{ + const T scale = 2.0 * -cutoff; + taps = bind_expression(fn_sinc(), + symmlinspace<T, true>((taps.size() - 1) * cutoff * c_pi<T>, taps.size(), true)) * + scale * window; + + if (is_odd(taps.size())) + taps[taps.size() / 2] = 1 - 2.0 * cutoff; + + if (normalize) { - const T scale = 2.0 * -cutoff; - taps = bind_expression(fn_sinc(), symmlinspace<T, true>((taps.size() - 1) * cutoff * c_pi<T>, - taps.size(), true)) * - scale * window; - - if (is_odd(taps.size())) - taps[taps.size() / 2] = 1 - 2.0 * cutoff; - - if (normalize) - { - const T invsum = reciprocal(sum(taps) + 1); - taps = taps * invsum; - } + const T invsum = reciprocal(sum(taps) + 1); + taps = taps * invsum; } +} + +template <typename T> +KFR_SINTRIN void fir_bandpass(univector_ref<T> taps, T frequency1, T frequency2, + const expression_pointer<T>& window, bool normalize = true) +{ + const T scale1 = 2.0 * frequency1; + const T scale2 = 2.0 * frequency2; + const T sc = c_pi<T> * T(taps.size() - 1); + const T start1 = sc * frequency1; + const T start2 = sc * frequency2; + + taps = (bind_expression(fn_sinc(), symmlinspace<T, true>(start2, taps.size(), true)) * scale2 - + bind_expression(fn_sinc(), symmlinspace<T, true>(start1, taps.size(), true)) * scale1) * + window; - template <typename T> - KFR_SINTRIN void fir_bandpass(univector_ref<T> taps, T frequency1, T frequency2, - const expression_pointer<T>& window, bool normalize = true) + if (is_odd(taps.size())) + taps[taps.size() / 2] = 2 * (frequency2 - frequency1); + + if (normalize) { - const T scale1 = 2.0 * frequency1; - const T scale2 = 2.0 * frequency2; - const T sc = c_pi<T> * T(taps.size() - 1); - const T start1 = sc * frequency1; - const T start2 = sc * frequency2; - - taps = (bind_expression(fn_sinc(), symmlinspace<T, true>(start2, taps.size(), true)) * scale2 - - bind_expression(fn_sinc(), symmlinspace<T, true>(start1, taps.size(), true)) * scale1) * - window; - - if (is_odd(taps.size())) - taps[taps.size() / 2] = 2 * (frequency2 - frequency1); - - if (normalize) - { - const T invsum = reciprocal(sum(taps) + 1); - taps = taps * invsum; - } + const T invsum = reciprocal(sum(taps) + 1); + taps = taps * invsum; } +} + +template <typename T> +KFR_SINTRIN void fir_bandstop(univector_ref<T> taps, T frequency1, T frequency2, + const expression_pointer<T>& window, bool normalize = true) +{ + const T scale1 = 2.0 * frequency1; + const T scale2 = 2.0 * frequency2; + const T sc = c_pi<T> * T(taps.size() - 1); + const T start1 = sc * frequency1; + const T start2 = sc * frequency2; + + taps = (bind_expression(fn_sinc(), symmlinspace<T, true>(start1, taps.size(), true)) * scale1 - + bind_expression(fn_sinc(), symmlinspace<T, true>(start2, taps.size(), true)) * scale2) * + window; + + if (is_odd(taps.size())) + taps[taps.size() / 2] = 1 - 2 * (frequency2 - frequency1); - template <typename T> - KFR_SINTRIN void fir_bandstop(univector_ref<T> taps, T frequency1, T frequency2, - const expression_pointer<T>& window, bool normalize = true) + if (normalize) { - const T scale1 = 2.0 * frequency1; - const T scale2 = 2.0 * frequency2; - const T sc = c_pi<T> * T(taps.size() - 1); - const T start1 = sc * frequency1; - const T start2 = sc * frequency2; - - taps = (bind_expression(fn_sinc(), symmlinspace<T, true>(start1, taps.size(), true)) * scale1 - - bind_expression(fn_sinc(), symmlinspace<T, true>(start2, taps.size(), true)) * scale2) * - window; - - if (is_odd(taps.size())) - taps[taps.size() / 2] = 1 - 2 * (frequency2 - frequency1); - - if (normalize) - { - const T invsum = reciprocal(sum(taps)); - taps = taps * invsum; - } + const T invsum = reciprocal(sum(taps)); + taps = taps * invsum; } +} - template <size_t tapcount, typename T, typename E1> - struct expression_short_fir : expression<E1> - { - static_assert(is_poweroftwo(tapcount), "tapcount must be a power of two"); - template <cpu_t newcpu> - using retarget_this = - typename in_fir<newcpu>::template expression_short_fir<tapcount, T, retarget<E1, newcpu>>; - - expression_short_fir(E1&& e1, const array_ref<T>& taps) - : expression<E1>(std::forward<E1>(e1)), taps(taps), delayline(0) - { - } - template <typename U, size_t N> - KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N> x) const - { - vec<T, N> in = cast<T>(this->argument_first(index, x)); - - vec<T, N> out = in * taps[0]; - cfor(csize<1>, csize<tapcount>, - [&](auto I) { out = out + concat_and_slice<tapcount - 1 - I, N>(delayline, in) * taps[I]; }); - delayline = concat_and_slice<N, tapcount - 1>(delayline, in); - - return cast<U>(out); - } - vec<T, tapcount> taps; - mutable vec<T, tapcount - 1> delayline; - }; - - template <typename T, typename E1> - struct expression_fir : expression<E1> - { - template <cpu_t newcpu> - using retarget_this = typename in_fir<newcpu>::template expression_fir<T, retarget<E1, newcpu>>; - - expression_fir(E1&& e1, const array_ref<const T>& taps) - : expression<E1>(std::forward<E1>(e1)), taps(taps), delayline(taps.size(), T()), - delayline_cursor(0) - { - } - template <typename U, size_t N> - KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N> x) const - { - const size_t tapcount = taps.size(); - const vec<T, N> input = cast<T>(this->argument_first(index, x)); - - vec<T, N> output; - size_t cursor = delayline_cursor; - KFR_LOOP_NOUNROLL - for (size_t i = 0; i < N; i++) - { - delayline.ringbuf_write(cursor, input[i]); - output(i) = dotproduct(taps, delayline.slice(cursor) /*, tapcount - cursor*/) + - dotproduct(taps.slice(tapcount - cursor), delayline /*, cursor*/); - } - delayline_cursor = cursor; - return cast<U>(output); - } - univector_dyn<T> taps; - mutable univector_dyn<T> delayline; - mutable size_t delayline_cursor; - }; - KFR_SPEC_FN(in_fir_design, fir_lowpass) - KFR_SPEC_FN(in_fir_design, fir_highpass) - KFR_SPEC_FN(in_fir_design, fir_bandpass) - KFR_SPEC_FN(in_fir_design, fir_bandstop) -}; +KFR_FN(fir_lowpass) +KFR_FN(fir_highpass) +KFR_FN(fir_bandpass) +KFR_FN(fir_bandstop) } -namespace native -{ template <typename T, size_t Tag> KFR_INLINE void fir_lowpass(univector<T, Tag>& taps, identity<T> cutoff, const expression_pointer<T>& window, bool normalize = true) { - return internal::in_fir_design<>::fir_lowpass(taps.slice(), cutoff, window, normalize); + return internal::fir_lowpass(taps.slice(), cutoff, window, normalize); } template <typename T, size_t Tag> KFR_INLINE void fir_highpass(univector<T, Tag>& taps, identity<T> cutoff, const expression_pointer<T>& window, bool normalize = true) { - return internal::in_fir_design<>::fir_highpass(taps.slice(), cutoff, window, normalize); + return internal::fir_highpass(taps.slice(), cutoff, window, normalize); } template <typename T, size_t Tag> KFR_INLINE void fir_bandpass(univector<T, Tag>& taps, identity<T> frequency1, identity<T> frequency2, const expression_pointer<T>& window, bool normalize = true) { - return internal::in_fir_design<>::fir_bandpass(taps.slice(), frequency1, frequency2, window, normalize); + return internal::fir_bandpass(taps.slice(), frequency1, frequency2, window, normalize); } template <typename T, size_t Tag> KFR_INLINE void fir_bandstop(univector<T, Tag>& taps, identity<T> frequency1, identity<T> frequency2, const expression_pointer<T>& window, bool normalize = true) { - return internal::in_fir_design<>::fir_bandstop(taps.slice(), frequency1, frequency2, window, normalize); -} + return internal::fir_bandstop(taps.slice(), frequency1, frequency2, window, normalize); } } - -#pragma clang diagnostic pop diff --git a/include/kfr/dsp/fracdelay.hpp b/include/kfr/dsp/fracdelay.hpp @@ -27,18 +27,14 @@ namespace kfr { -namespace native -{ - template <typename T, typename E1> -KFR_INLINE internal::in_fir<>::expression_short_fir<2, T, E1> fracdelay(E1&& e1, T delay) +KFR_INLINE internal::expression_short_fir<2, T, E1> fracdelay(E1&& e1, T delay) { if (delay < 0) delay = 0; if (delay > 1) delay = fract(delay); univector<T, 2> taps({ 1 - delay, delay }); - return internal::in_fir<>::expression_short_fir<2, T, E1>(std::forward<E1>(e1), taps.ref()); -} + return internal::expression_short_fir<2, T, E1>(std::forward<E1>(e1), taps.ref()); } } diff --git a/include/kfr/dsp/oscillators.hpp b/include/kfr/dsp/oscillators.hpp @@ -23,28 +23,20 @@ #pragma once #include "../base/sin_cos.hpp" -#include "../base/vec.hpp" #include "../expressions/basic.hpp" -#pragma clang diagnostic push -#if CID_HAS_WARNING("-Winaccessible-base") -#pragma clang diagnostic ignored "-Winaccessible-base" -#endif - namespace kfr { template <typename T> auto jaehne(T magn, size_t size) { - using namespace native; return typed<T>(magn * sin(c_pi<T, 1, 2> * sqr(linspace(T(0), T(size), size, false)) / size), size); } template <typename T> auto swept(T magn, size_t size) { - using namespace native; return typed<T>( magn * sin(c_pi<T, 1, 4> * sqr(sqr(linspace(T(0), T(size), size, false)) / sqr(T(size))) * T(size)), size); @@ -52,277 +44,235 @@ auto swept(T magn, size_t size) namespace internal { -template <cpu_t c = cpu_t::native, cpu_t cc = c> -struct in_oscillators : in_sin_cos<cc>, in_select<cc>, in_round<cc>, in_abs<cc> +template <typename T> +KFR_SINTRIN T rawsine(T x) { -private: - using in_sin_cos<cc>::fastsin; - using in_sin_cos<cc>::sin; - using in_select<cc>::select; - using in_round<cc>::fract; - using in_abs<cc>::abs; - -public: - template <typename T> - KFR_SINTRIN T rawsine(T x) - { - return fastsin(x * c_pi<T, 2>); - } - template <typename T> - KFR_SINTRIN T sinenorm(T x) - { - return rawsine(fract(x)); - } - template <typename T> - KFR_SINTRIN T sine(T x) - { - return sinenorm(c_recip_pi<T, 1, 2> * x); - } + return fastsin(x * c_pi<T, 2>); +} +template <typename T> +KFR_SINTRIN T sinenorm(T x) +{ + return rawsine(fract(x)); +} +template <typename T> +KFR_SINTRIN T sine(T x) +{ + return sinenorm(c_recip_pi<T, 1, 2> * x); +} - template <typename T> - KFR_SINTRIN T rawsquare(T x) - { - return select(x < T(0.5), T(1), -T(1)); - } - template <typename T> - KFR_SINTRIN T squarenorm(T x) - { - return rawsquare(fract(x)); - } - template <typename T> - KFR_SINTRIN T square(T x) - { - return squarenorm(c_recip_pi<T, 1, 2> * x); - } +template <typename T> +KFR_SINTRIN T rawsquare(T x) +{ + return select(x < T(0.5), T(1), -T(1)); +} +template <typename T> +KFR_SINTRIN T squarenorm(T x) +{ + return rawsquare(fract(x)); +} +template <typename T> +KFR_SINTRIN T square(T x) +{ + return squarenorm(c_recip_pi<T, 1, 2> * x); +} - template <typename T> - KFR_SINTRIN T rawsawtooth(T x) - { - return T(1) - 2 * x; - } - template <typename T> - KFR_SINTRIN T sawtoothnorm(T x) - { - return rawsawtooth(fract(x)); - } - template <typename T> - KFR_SINTRIN T sawtooth(T x) - { - return sawtoothnorm(c_recip_pi<T, 1, 2> * x); - } +template <typename T> +KFR_SINTRIN T rawsawtooth(T x) +{ + return T(1) - 2 * x; +} +template <typename T> +KFR_SINTRIN T sawtoothnorm(T x) +{ + return rawsawtooth(fract(x)); +} +template <typename T> +KFR_SINTRIN T sawtooth(T x) +{ + return sawtoothnorm(c_recip_pi<T, 1, 2> * x); +} - template <typename T> - KFR_SINTRIN T isawtoothnorm(T x) - { - return T(-1) + 2 * fract(x + 0.5); - } - template <typename T> - KFR_SINTRIN T isawtooth(T x) - { - return isawtoothnorm(c_recip_pi<T, 1, 2> * x); - } +template <typename T> +KFR_SINTRIN T isawtoothnorm(T x) +{ + return T(-1) + 2 * fract(x + 0.5); +} +template <typename T> +KFR_SINTRIN T isawtooth(T x) +{ + return isawtoothnorm(c_recip_pi<T, 1, 2> * x); +} - template <typename T> - KFR_SINTRIN T rawtriangle(T x) - { - return 1 - abs(4 * x - 2); - } - template <typename T> - KFR_SINTRIN T trianglenorm(T x) - { - return rawtriangle(fract(x + 0.25)); - } - template <typename T> - KFR_SINTRIN T triangle(T x) - { - return trianglenorm(c_recip_pi<T, 1, 2> * x); - } +template <typename T> +KFR_SINTRIN T rawtriangle(T x) +{ + return 1 - abs(4 * x - 2); +} +template <typename T> +KFR_SINTRIN T trianglenorm(T x) +{ + return rawtriangle(fract(x + 0.25)); +} +template <typename T> +KFR_SINTRIN T triangle(T x) +{ + return trianglenorm(c_recip_pi<T, 1, 2> * x); +} - KFR_SPEC_FN(in_oscillators, rawsine) - KFR_SPEC_FN(in_oscillators, sine) - KFR_SPEC_FN(in_oscillators, sinenorm) - KFR_SPEC_FN(in_oscillators, rawsquare) - KFR_SPEC_FN(in_oscillators, square) - KFR_SPEC_FN(in_oscillators, squarenorm) - KFR_SPEC_FN(in_oscillators, rawtriangle) - KFR_SPEC_FN(in_oscillators, triangle) - KFR_SPEC_FN(in_oscillators, trianglenorm) - KFR_SPEC_FN(in_oscillators, rawsawtooth) - KFR_SPEC_FN(in_oscillators, sawtooth) - KFR_SPEC_FN(in_oscillators, sawtoothnorm) - KFR_SPEC_FN(in_oscillators, isawtooth) - KFR_SPEC_FN(in_oscillators, isawtoothnorm) -}; +KFR_FN(rawsine) +KFR_FN(sine) +KFR_FN(sinenorm) +KFR_FN(rawsquare) +KFR_FN(square) +KFR_FN(squarenorm) +KFR_FN(rawtriangle) +KFR_FN(triangle) +KFR_FN(trianglenorm) +KFR_FN(rawsawtooth) +KFR_FN(sawtooth) +KFR_FN(sawtoothnorm) +KFR_FN(isawtooth) +KFR_FN(isawtoothnorm) } -using fn_rawsine = internal::in_oscillators<>::fn_rawsine; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> rawsine(const T1& x) +KFR_INTRIN T1 rawsine(const T1& x) { - return internal::in_oscillators<>::rawsine(x); + return internal::rawsine(x); } - template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_rawsine, E1> rawsine(E1&& x) +KFR_INTRIN expr_func<internal::fn_rawsine, E1> rawsine(E1&& x) { return { {}, std::forward<E1>(x) }; } -using fn_sine = internal::in_oscillators<>::fn_sine; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> sine(const T1& x) +KFR_INTRIN T1 sine(const T1& x) { - return internal::in_oscillators<>::sine(x); + return internal::sine(x); } - template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_sine, E1> sine(E1&& x) +KFR_INTRIN expr_func<internal::fn_sine, E1> sine(E1&& x) { return { {}, std::forward<E1>(x) }; } -using fn_sinenorm = internal::in_oscillators<>::fn_sinenorm; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> sinenorm(const T1& x) +KFR_INTRIN T1 sinenorm(const T1& x) { - return internal::in_oscillators<>::sinenorm(x); + return internal::sinenorm(x); } - template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_sinenorm, E1> sinenorm(E1&& x) +KFR_INTRIN expr_func<internal::fn_sinenorm, E1> sinenorm(E1&& x) { return { {}, std::forward<E1>(x) }; } -using fn_rawsquare = internal::in_oscillators<>::fn_rawsquare; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> rawsquare(const T1& x) +KFR_INTRIN T1 rawsquare(const T1& x) { - return internal::in_oscillators<>::rawsquare(x); + return internal::rawsquare(x); } - template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_rawsquare, E1> rawsquare(E1&& x) +KFR_INTRIN expr_func<internal::fn_rawsquare, E1> rawsquare(E1&& x) { return { {}, std::forward<E1>(x) }; } -using fn_square = internal::in_oscillators<>::fn_square; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> square(const T1& x) +KFR_INTRIN T1 square(const T1& x) { - return internal::in_oscillators<>::square(x); + return internal::square(x); } - template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_square, E1> square(E1&& x) +KFR_INTRIN expr_func<internal::fn_square, E1> square(E1&& x) { return { {}, std::forward<E1>(x) }; } -using fn_squarenorm = internal::in_oscillators<>::fn_squarenorm; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> squarenorm(const T1& x) +KFR_INTRIN T1 squarenorm(const T1& x) { - return internal::in_oscillators<>::squarenorm(x); + return internal::squarenorm(x); } - template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_squarenorm, E1> squarenorm(E1&& x) +KFR_INTRIN expr_func<internal::fn_squarenorm, E1> squarenorm(E1&& x) { return { {}, std::forward<E1>(x) }; } -using fn_rawtriangle = internal::in_oscillators<>::fn_rawtriangle; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> rawtriangle(const T1& x) +KFR_INTRIN T1 rawtriangle(const T1& x) { - return internal::in_oscillators<>::rawtriangle(x); + return internal::rawtriangle(x); } - template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_rawtriangle, E1> rawtriangle(E1&& x) +KFR_INTRIN expr_func<internal::fn_rawtriangle, E1> rawtriangle(E1&& x) { return { {}, std::forward<E1>(x) }; } -using fn_triangle = internal::in_oscillators<>::fn_triangle; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> triangle(const T1& x) +KFR_INTRIN T1 triangle(const T1& x) { - return internal::in_oscillators<>::triangle(x); + return internal::triangle(x); } - template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_triangle, E1> triangle(E1&& x) +KFR_INTRIN expr_func<internal::fn_triangle, E1> triangle(E1&& x) { return { {}, std::forward<E1>(x) }; } -using fn_trianglenorm = internal::in_oscillators<>::fn_trianglenorm; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> trianglenorm(const T1& x) +KFR_INTRIN T1 trianglenorm(const T1& x) { - return internal::in_oscillators<>::trianglenorm(x); + return internal::trianglenorm(x); } - template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_trianglenorm, E1> trianglenorm(E1&& x) +KFR_INTRIN expr_func<internal::fn_trianglenorm, E1> trianglenorm(E1&& x) { return { {}, std::forward<E1>(x) }; } -using fn_rawsawtooth = internal::in_oscillators<>::fn_rawsawtooth; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> rawsawtooth(const T1& x) +KFR_INTRIN T1 rawsawtooth(const T1& x) { - return internal::in_oscillators<>::rawsawtooth(x); + return internal::rawsawtooth(x); } - template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_rawsawtooth, E1> rawsawtooth(E1&& x) +KFR_INTRIN expr_func<internal::fn_rawsawtooth, E1> rawsawtooth(E1&& x) { return { {}, std::forward<E1>(x) }; } -using fn_sawtooth = internal::in_oscillators<>::fn_sawtooth; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> sawtooth(const T1& x) +KFR_INTRIN T1 sawtooth(const T1& x) { - return internal::in_oscillators<>::sawtooth(x); + return internal::sawtooth(x); } - template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_sawtooth, E1> sawtooth(E1&& x) +KFR_INTRIN expr_func<internal::fn_sawtooth, E1> sawtooth(E1&& x) { return { {}, std::forward<E1>(x) }; } -using fn_sawtoothnorm = internal::in_oscillators<>::fn_sawtoothnorm; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> sawtoothnorm(const T1& x) +KFR_INTRIN T1 sawtoothnorm(const T1& x) { - return internal::in_oscillators<>::sawtoothnorm(x); + return internal::sawtoothnorm(x); } - template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_sawtoothnorm, E1> sawtoothnorm(E1&& x) +KFR_INTRIN expr_func<internal::fn_sawtoothnorm, E1> sawtoothnorm(E1&& x) { return { {}, std::forward<E1>(x) }; } -using fn_isawtooth = internal::in_oscillators<>::fn_isawtooth; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> isawtooth(const T1& x) +KFR_INTRIN T1 isawtooth(const T1& x) { - return internal::in_oscillators<>::isawtooth(x); + return internal::isawtooth(x); } - template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_isawtooth, E1> isawtooth(E1&& x) +KFR_INTRIN expr_func<internal::fn_isawtooth, E1> isawtooth(E1&& x) { return { {}, std::forward<E1>(x) }; } -using fn_isawtoothnorm = internal::in_oscillators<>::fn_isawtoothnorm; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> isawtoothnorm(const T1& x) +KFR_INTRIN T1 isawtoothnorm(const T1& x) { - return internal::in_oscillators<>::isawtoothnorm(x); + return internal::isawtoothnorm(x); } - template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_isawtoothnorm, E1> isawtoothnorm(E1&& x) +KFR_INTRIN expr_func<internal::fn_isawtoothnorm, E1> isawtoothnorm(E1&& x) { return { {}, std::forward<E1>(x) }; } } - -#pragma clang diagnostic pop diff --git a/include/kfr/dsp/resample.hpp b/include/kfr/dsp/resample.hpp @@ -28,11 +28,6 @@ #include "../expressions/reduce.hpp" #include "window.hpp" -#pragma clang diagnostic push -#if CID_HAS_WARNING("-Winaccessible-base") -#pragma clang diagnostic ignored "-Winaccessible-base" -#endif - namespace kfr { namespace resample_quality @@ -45,200 +40,174 @@ constexpr csize_t<10> high{}; namespace internal { -template <cpu_t cc = cpu_t::native> -struct in_resampling : in_sqrt<cc>, in_abs<cc>, in_log_exp<cc>, in_sin_cos<cc>, in_window<cc>, in_reduce<cc> +template <typename T1, typename T2> +KFR_SINTRIN T1 resample_blackman(T1 n, T2 a) { -private: - using in_sqrt<cc>::sqrt; - using in_abs<cc>::abs; - using in_log_exp<cc>::log; - using in_log_exp<cc>::exp; - using in_log_exp<cc>::log_fmadd; - using in_log_exp<cc>::exp_fmadd; - using in_log_exp<cc>::exp10; - using in_sin_cos<cc>::cos; - using in_sin_cos<cc>::sinc; - using in_reduce<cc>::dotproduct; - using in_reduce<cc>::sum; - -public: - template <typename T1, typename T2> - static inline T1 blackman(T1 n, T2 a) - { - const T1 a0 = (1 - a) * 0.5; - const T1 a1 = 0.5; - const T1 a2 = a * 0.5; - n = n * c_pi<T1, 2>; - return a0 - a1 * cos(n) + a2 * cos(2 * n); - } + const T1 a0 = (1 - a) * 0.5; + const T1 a1 = 0.5; + const T1 a2 = a * 0.5; + n = n * c_pi<T1, 2>; + return a0 - a1 * cos(n) + a2 * cos(2 * n); +} - template <typename T, size_t quality> - struct resampler - { - template <cpu_t newcpu> - using retarget_this = typename in_resampling<newcpu>::template resampler<T, quality>; +template <typename T, size_t quality> +struct resampler +{ + using itype = i64; - using itype = i64; + constexpr static itype depth = static_cast<itype>(1 << (quality + 1)); - constexpr static itype depth = static_cast<itype>(1 << (quality + 1)); + resampler(itype interpolation_factor, itype decimation_factor, T scale = T(1), T cutoff = 0.49) + : input_position(0), output_position(0) + { + const i64 gcf = gcd(interpolation_factor, decimation_factor); + interpolation_factor /= gcf; + decimation_factor /= gcf; - resampler(itype interpolation_factor, itype decimation_factor, T scale = T(1), T cutoff = 0.49) - : input_position(0), output_position(0) - { - const i64 gcf = gcd(interpolation_factor, decimation_factor); - interpolation_factor /= gcf; - decimation_factor /= gcf; + taps = depth * interpolation_factor; + order = size_t(depth * interpolation_factor - 1); - taps = depth * interpolation_factor; - order = size_t(depth * interpolation_factor - 1); + this->interpolation_factor = interpolation_factor; + this->decimation_factor = decimation_factor; - this->interpolation_factor = interpolation_factor; - this->decimation_factor = decimation_factor; + const itype halftaps = taps / 2; + filter = univector<T>(size_t(taps), T()); + delay = univector<T>(size_t(depth), T()); - const itype halftaps = taps / 2; - filter = univector<T>(size_t(taps), T()); - delay = univector<T>(size_t(depth), T()); + cutoff = cutoff / std::max(decimation_factor, interpolation_factor); - cutoff = cutoff / std::max(decimation_factor, interpolation_factor); + for (itype j = 0, jj = 0; j < taps; j++) + { + filter[size_t(j)] = scale * 2 * interpolation_factor * cutoff * + sinc((jj - halftaps) * cutoff * c_pi<T, 2>) * + resample_blackman(T(jj) / T(taps - 1), T(0.16)); + jj += size_t(interpolation_factor); + if (jj >= taps) + jj = jj - taps + 1; + } - for (itype j = 0, jj = 0; j < taps; j++) - { - filter[size_t(j)] = scale * 2 * interpolation_factor * cutoff * - sinc((jj - halftaps) * cutoff * c_pi<T, 2>) * - blackman(T(jj) / T(taps - 1), T(0.16)); - jj += size_t(interpolation_factor); - if (jj >= taps) - jj = jj - taps + 1; - } + const T s = reciprocal(sum(filter)) * interpolation_factor; + filter = filter * s; + } + KFR_INLINE size_t operator()(T* dest, size_t zerosize) + { + size_t outputsize = 0; + const itype srcsize = itype(zerosize); - const T s = reciprocal(sum(filter)) * interpolation_factor; - filter = filter * s; - } - KFR_INLINE size_t operator()(T* dest, size_t zerosize) + for (size_t i = 0;; i++) { - size_t outputsize = 0; - const itype srcsize = itype(zerosize); - - for (size_t i = 0;; i++) + const itype ii = itype(i) + output_position; + const itype workindex = ii * (decimation_factor); + const itype workindex_rem = workindex % (interpolation_factor); + const itype start = workindex_rem ? (interpolation_factor)-workindex_rem : 0; + itype srcindex = workindex / (interpolation_factor); + srcindex = workindex_rem ? srcindex + 1 : srcindex; + const univector_ref<T> tap_ptr = filter.slice(static_cast<size_t>(start * depth)); + srcindex = srcindex - (depth - 1); + + if (srcindex + depth >= input_position + srcsize) + break; + outputsize++; + + if (dest) { - const itype ii = itype(i) + output_position; - const itype workindex = ii * (decimation_factor); - const itype workindex_rem = workindex % (interpolation_factor); - const itype start = workindex_rem ? (interpolation_factor)-workindex_rem : 0; - itype srcindex = workindex / (interpolation_factor); - srcindex = workindex_rem ? srcindex + 1 : srcindex; - const univector_ref<T> tap_ptr = filter.slice(static_cast<size_t>(start * depth)); - srcindex = srcindex - (depth - 1); - - if (srcindex + depth >= input_position + srcsize) - break; - outputsize++; - - if (dest) + if (srcindex >= input_position) { - if (srcindex >= input_position) - { - dest[i] = T(0); - } - else - { - const itype prev_count = input_position - srcindex; - dest[i] = dotproduct(delay.slice(size_t(depth - prev_count)), tap_ptr); - } + dest[i] = T(0); + } + else + { + const itype prev_count = input_position - srcindex; + dest[i] = dotproduct(delay.slice(size_t(depth - prev_count)), tap_ptr); } } - if (srcsize >= depth) - { - delay = zeros(); - } - else - { - delay.slice(0, size_t(depth - srcsize)) = delay.slice(size_t(srcsize)); - delay.slice(size_t(depth - srcsize)) = zeros(); - } - - input_position += srcsize; - output_position += outputsize; - return outputsize; } - KFR_INLINE size_t operator()(T* dest, univector_ref<const T> src) + if (srcsize >= depth) { - size_t outputsize = 0; - const itype srcsize = itype(src.size()); + delay = zeros(); + } + else + { + delay.slice(0, size_t(depth - srcsize)) = delay.slice(size_t(srcsize)); + delay.slice(size_t(depth - srcsize)) = zeros(); + } + + input_position += srcsize; + output_position += outputsize; + return outputsize; + } + KFR_INLINE size_t operator()(T* dest, univector_ref<const T> src) + { + size_t outputsize = 0; + const itype srcsize = itype(src.size()); - for (size_t i = 0;; i++) + for (size_t i = 0;; i++) + { + const itype ii = itype(i) + output_position; + const itype workindex = ii * (decimation_factor); + const itype workindex_rem = workindex % (interpolation_factor); + const itype start = workindex_rem ? (interpolation_factor)-workindex_rem : 0; + itype srcindex = workindex / (interpolation_factor); + srcindex = workindex_rem ? srcindex + 1 : srcindex; + const univector_ref<T> tap_ptr = filter.slice(static_cast<size_t>(start * depth)); + srcindex = srcindex - (depth - 1); + + if (srcindex + depth >= input_position + srcsize) + break; + outputsize++; + + if (dest) { - const itype ii = itype(i) + output_position; - const itype workindex = ii * (decimation_factor); - const itype workindex_rem = workindex % (interpolation_factor); - const itype start = workindex_rem ? (interpolation_factor)-workindex_rem : 0; - itype srcindex = workindex / (interpolation_factor); - srcindex = workindex_rem ? srcindex + 1 : srcindex; - const univector_ref<T> tap_ptr = filter.slice(static_cast<size_t>(start * depth)); - srcindex = srcindex - (depth - 1); - - if (srcindex + depth >= input_position + srcsize) - break; - outputsize++; - - if (dest) + if (srcindex >= input_position) { - if (srcindex >= input_position) - { - dest[i] = dotproduct(src.slice(size_t(srcindex - input_position), size_t(depth)), - tap_ptr /*, depth*/); - } - else - { - const itype prev_count = input_position - srcindex; - dest[i] = - dotproduct(delay.slice(size_t(depth - prev_count)), - tap_ptr /*, size_t(prev_count)*/) + - dotproduct(src, tap_ptr.slice( - size_t(prev_count), - size_t(depth - prev_count)) /*, size_t(depth - prev_count)*/); - } + dest[i] = dotproduct(src.slice(size_t(srcindex - input_position), size_t(depth)), + tap_ptr /*, depth*/); + } + else + { + const itype prev_count = input_position - srcindex; + dest[i] = + dotproduct(delay.slice(size_t(depth - prev_count)), + tap_ptr /*, size_t(prev_count)*/) + + dotproduct( + src, tap_ptr.slice(size_t(prev_count), + size_t(depth - prev_count)) /*, size_t(depth - prev_count)*/); } } - if (srcsize >= depth) - { - delay = src.slice(size_t(srcsize - depth)); - } - else - { - delay.slice(0, size_t(depth - srcsize)) = delay.slice(size_t(srcsize)); - delay.slice(size_t(depth - srcsize)) = src; - } - - input_position += srcsize; - output_position += outputsize; - return outputsize; } - itype taps; - size_t order; - itype interpolation_factor; - itype decimation_factor; - univector<T> filter; - univector<T> delay; - itype input_position; - itype output_position; - }; + if (srcsize >= depth) + { + delay = src.slice(size_t(srcsize - depth)); + } + else + { + delay.slice(0, size_t(depth - srcsize)) = delay.slice(size_t(srcsize)); + delay.slice(size_t(depth - srcsize)) = src; + } + + input_position += srcsize; + output_position += outputsize; + return outputsize; + } + itype taps; + size_t order; + itype interpolation_factor; + itype decimation_factor; + univector<T> filter; + univector<T> delay; + itype input_position; + itype output_position; }; } -namespace native -{ template <typename T, size_t quality> -inline internal::in_resampling<>::resampler<T, quality> resampler(csize_t<quality>, +inline internal::resampler<T, quality> resampler(csize_t<quality>, size_t interpolation_factor, size_t decimation_factor, T scale = T(1), T cutoff = 0.49) { - using itype = typename internal::in_resampling<>::resampler<T, quality>::itype; - return internal::in_resampling<>::resampler<T, quality>(itype(interpolation_factor), + using itype = typename internal::resampler<T, quality>::itype; + return internal::resampler<T, quality>(itype(interpolation_factor), itype(decimation_factor), scale, cutoff); } } -} - -#pragma clang diagnostic pop diff --git a/include/kfr/dsp/units.hpp b/include/kfr/dsp/units.hpp @@ -31,11 +31,6 @@ #include "../base/vec.hpp" #include "../expressions/basic.hpp" -#pragma clang diagnostic push -#if CID_HAS_WARNING("-Winaccessible-base") -#pragma clang diagnostic ignored "-Winaccessible-base" -#endif - namespace kfr { @@ -43,177 +38,159 @@ using sample_rate_t = double; namespace internal { -template <cpu_t c = cpu_t::native, cpu_t cc = c> -struct in_dsp_units : in_log_exp<cc>, in_select<cc>, in_round<cc>, in_abs<cc> -{ -private: - using in_log_exp<cc>::log; - using in_log_exp<cc>::exp; - using in_log_exp<cc>::log10; - using in_log_exp<cc>::exp10; - using in_log_exp<cc>::exp_fmadd; - using in_log_exp<cc>::log_fmadd; - using in_select<cc>::select; - using in_round<cc>::fract; - using in_abs<cc>::abs; - -public: - template <typename T, typename TF = ftype<T>> - KFR_SINTRIN TF amp_to_dB(T amp) - { - return log(cast<subtype<TF>>(amp)) * subtype<TF>(8.6858896380650365530225783783322); - // return T( 20.0 ) * log10( level ); - } - - template <typename T, typename TF = ftype<T>> - KFR_SINTRIN TF dB_to_amp(T dB) - { - return exp(dB * subtype<TF>(0.11512925464970228420089957273422)); - // return exp10( dB / 20 ); - } - - template <typename T, typename TF = ftype<T>> - KFR_SINTRIN TF amp_to_dB(T amp, T offset) - { - return log_fmadd(amp, subtype<TF>(8.6858896380650365530225783783322), offset); - // return T( 20.0 ) * log10( level ); - } - - template <typename T, typename TF = ftype<T>> - KFR_SINTRIN TF dB_to_amp(T dB, T offset) - { - auto offs = -subtype<TF>(0.11512925464970228420089957273422) * offset; - return exp_fmadd(dB, subtype<TF>(0.11512925464970228420089957273422), offs); - // return exp10( dB / 20 ); - } - - template <typename T> - KFR_SINTRIN T power_to_dB(T x) - { - return log(x) * (10 * c_recip_log_10<T>); - } - - template <typename T> - KFR_SINTRIN T dB_to_power(T x) - { - if (x == -c_infinity<T>) - return 0.0; - else - return exp(x * (c_log_10<T> / 10.0)); - } - - template <typename T, typename TF = ftype<T>> - KFR_SINTRIN TF note_to_hertz(T note) - { - const subtype<TF> offset = 2.1011784386926213177653145771814; - - return exp_fmadd(note, subtype<TF>(0.05776226504666210911810267678818), offset); - } - - template <typename T, typename TF = ftype<T>> - KFR_SINTRIN TF hertz_to_note(T hertz) - { - const subtype<TF> offset = -36.376316562295915248836189714583; - - return log_fmadd(hertz, subtype<TF>(17.312340490667560888319096172023), offset); - } - - template <typename T1, typename T2, typename T3, typename Tc = common_type<T1, T2, T3, f32>> - KFR_SINTRIN Tc note_to_hertz(T1 note, T2 tunenote, T3 tunehertz) - { - const Tc offset = log(tunehertz) - tunenote * subtype<Tc>(0.05776226504666210911810267678818); - - return exp_fmadd(note, subtype<Tc>(0.05776226504666210911810267678818), offset); - } - - template <typename T1, typename T2, typename T3, typename Tc = common_type<T1, T2, T3, f32>> - KFR_SINTRIN Tc hertz_to_note(T1 hertz, T2 tunenote, T3 tunehertz) - { - const Tc offset = tunenote - log(tunehertz) * subtype<Tc>(17.312340490667560888319096172023); - - return log_fmadd(hertz, subtype<Tc>(17.312340490667560888319096172023), offset); - } - - KFR_SPEC_FN(in_dsp_units, note_to_hertz) - KFR_SPEC_FN(in_dsp_units, hertz_to_note) - KFR_SPEC_FN(in_dsp_units, amp_to_dB) - KFR_SPEC_FN(in_dsp_units, dB_to_amp) - KFR_SPEC_FN(in_dsp_units, power_to_dB) - KFR_SPEC_FN(in_dsp_units, dB_to_power) -}; -} - -using fn_note_to_hertz = internal::in_dsp_units<>::fn_note_to_hertz; +template <typename T, typename TF = ftype<T>> +KFR_SINTRIN TF amp_to_dB(T amp) +{ + return log(cast<subtype<TF>>(amp)) * subtype<TF>(8.6858896380650365530225783783322); + // return T( 20.0 ) * log10( level ); +} + +template <typename T, typename TF = ftype<T>> +KFR_SINTRIN TF dB_to_amp(T dB) +{ + return exp(dB * subtype<TF>(0.11512925464970228420089957273422)); + // return exp10( dB / 20 ); +} + +template <typename T, typename TF = ftype<T>> +KFR_SINTRIN TF amp_to_dB2(T amp, T offset) +{ + return log_fmadd(amp, subtype<TF>(8.6858896380650365530225783783322), offset); + // return T( 20.0 ) * log10( level ); +} + +template <typename T, typename TF = ftype<T>> +KFR_SINTRIN TF dB_to_amp(T dB, T offset) +{ + auto offs = -subtype<TF>(0.11512925464970228420089957273422) * offset; + return exp_fmadd(dB, subtype<TF>(0.11512925464970228420089957273422), offs); + // return exp10( dB / 20 ); +} + +template <typename T> +KFR_SINTRIN T power_to_dB(T x) +{ + return log(x) * (10 * c_recip_log_10<T>); +} + +template <typename T> +KFR_SINTRIN T dB_to_power(T x) +{ + if (x == -c_infinity<T>) + return 0.0; + else + return exp(x * (c_log_10<T> / 10.0)); +} + +template <typename T, typename TF = ftype<T>> +KFR_SINTRIN TF note_to_hertz(T note) +{ + const subtype<TF> offset = 2.1011784386926213177653145771814; + + return exp_fmadd(note, subtype<TF>(0.05776226504666210911810267678818), offset); +} + +template <typename T, typename TF = ftype<T>> +KFR_SINTRIN TF hertz_to_note(T hertz) +{ + const subtype<TF> offset = -36.376316562295915248836189714583; + + return log_fmadd(hertz, subtype<TF>(17.312340490667560888319096172023), offset); +} + +template <typename T1, typename T2, typename T3, typename Tc = common_type<T1, T2, T3, f32>> +KFR_SINTRIN Tc note_to_hertz(T1 note, T2 tunenote, T3 tunehertz) +{ + const Tc offset = log(tunehertz) - tunenote * subtype<Tc>(0.05776226504666210911810267678818); + + return exp_fmadd(note, subtype<Tc>(0.05776226504666210911810267678818), offset); +} + +template <typename T1, typename T2, typename T3, typename Tc = common_type<T1, T2, T3, f32>> +KFR_SINTRIN Tc hertz_to_note(T1 hertz, T2 tunenote, T3 tunehertz) +{ + const Tc offset = tunenote - log(tunehertz) * subtype<Tc>(17.312340490667560888319096172023); + + return log_fmadd(hertz, subtype<Tc>(17.312340490667560888319096172023), offset); +} + +KFR_I_FN(note_to_hertz) +KFR_I_FN(hertz_to_note) +KFR_I_FN(amp_to_dB) +KFR_I_FN(dB_to_amp) +KFR_I_FN(power_to_dB) +KFR_I_FN(dB_to_power) +} + template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> note_to_hertz(const T1& x) +KFR_INTRIN T1 note_to_hertz(const T1& x) { - return internal::in_dsp_units<>::note_to_hertz(x); + return internal::note_to_hertz(x); } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_note_to_hertz, E1> note_to_hertz(E1&& x) +KFR_INTRIN expr_func<internal::fn_note_to_hertz, E1> note_to_hertz(E1&& x) { return { {}, std::forward<E1>(x) }; } -using fn_hertz_to_note = internal::in_dsp_units<>::fn_hertz_to_note; + template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> hertz_to_note(const T1& x) +KFR_INTRIN T1 hertz_to_note(const T1& x) { - return internal::in_dsp_units<>::hertz_to_note(x); + return internal::hertz_to_note(x); } + template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_hertz_to_note, E1> hertz_to_note(E1&& x) +KFR_INTRIN expr_func<internal::fn_hertz_to_note, E1> hertz_to_note(E1&& x) { return { {}, std::forward<E1>(x) }; } -using fn_amp_to_dB = internal::in_dsp_units<>::fn_amp_to_dB; + template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> amp_to_dB(const T1& x) +KFR_INTRIN T1 amp_to_dB(const T1& x) { - return internal::in_dsp_units<>::amp_to_dB(x); + return internal::amp_to_dB(x); } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_amp_to_dB, E1> amp_to_dB(E1&& x) +KFR_INTRIN expr_func<internal::fn_amp_to_dB, E1> amp_to_dB(E1&& x) { return { {}, std::forward<E1>(x) }; } -using fn_dB_to_amp = internal::in_dsp_units<>::fn_dB_to_amp; + template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> dB_to_amp(const T1& x) +KFR_INTRIN T1 dB_to_amp(const T1& x) { - return internal::in_dsp_units<>::dB_to_amp(x); + return internal::dB_to_amp(x); } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_dB_to_amp, E1> dB_to_amp(E1&& x) +KFR_INTRIN expr_func<internal::fn_dB_to_amp, E1> dB_to_amp(E1&& x) { return { {}, std::forward<E1>(x) }; } -using fn_power_to_dB = internal::in_dsp_units<>::fn_power_to_dB; + template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> power_to_dB(const T1& x) +KFR_INTRIN T1 power_to_dB(const T1& x) { - return internal::in_dsp_units<>::power_to_dB(x); + return internal::power_to_dB(x); } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_power_to_dB, E1> power_to_dB(E1&& x) +KFR_INTRIN expr_func<internal::fn_power_to_dB, E1> power_to_dB(E1&& x) { return { {}, std::forward<E1>(x) }; } -using fn_dB_to_power = internal::in_dsp_units<>::fn_dB_to_power; + template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> -KFR_INTRIN ftype<T1> dB_to_power(const T1& x) +KFR_INTRIN T1 dB_to_power(const T1& x) { - return internal::in_dsp_units<>::dB_to_power(x); + return internal::dB_to_power(x); } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INTRIN expr_func<fn_dB_to_power, E1> dB_to_power(E1&& x) +KFR_INTRIN expr_func<internal::fn_dB_to_power, E1> dB_to_power(E1&& x) { return { {}, std::forward<E1>(x) }; } } - -#pragma clang diagnostic pop diff --git a/include/kfr/dsp/weighting.hpp b/include/kfr/dsp/weighting.hpp @@ -31,92 +31,83 @@ namespace kfr namespace internal { -template <cpu_t c = cpu_t::native, cpu_t cc = c> -struct in_weight : in_sqrt<cc>, in_dsp_units<cc> +template <typename T> +KFR_SINTRIN T weight_a_unnorm(T f) { -private: - using in_dsp_units<cc>::amp_to_dB; - -public: - template <typename T> - KFR_SINTRIN T weight_a_unnorm(T f) - { - const T f2 = pow2(f); - const T nom = pow2(12200) * pow4(f); - const T den = - (f2 + pow2(20.6)) * (sqrt((f2 + pow2(107.7)) * (f2 + pow2(737.9)))) * (f2 + pow2(12200)); - return nom / den; - } + const T f2 = pow2(f); + const T nom = pow2(12200) * pow4(f); + const T den = (f2 + pow2(20.6)) * (sqrt((f2 + pow2(107.7)) * (f2 + pow2(737.9)))) * (f2 + pow2(12200)); + return nom / den; +} - template <typename T> - constexpr static T weight_a_gain = reciprocal(weight_a_unnorm(T(1000.0))); +template <typename T> +constexpr static T weight_a_gain = reciprocal(weight_a_unnorm(T(1000.0))); - template <typename T> - KFR_SINTRIN T aweighting(T f) - { - return weight_a_unnorm(f) * weight_a_gain<subtype<T>>; - } +template <typename T> +KFR_SINTRIN T aweighting(T f) +{ + return weight_a_unnorm(f) * weight_a_gain<subtype<T>>; +} - template <typename T> - KFR_SINTRIN T weight_b_unnorm(T f) - { - const T f2 = pow2(f); - const T nom = pow2(12200) * pow3(f); - const T den = (f2 + pow2(20.6)) * (sqrt((f2 + pow2(158.5)))) * (f2 + pow2(12200)); +template <typename T> +KFR_SINTRIN T weight_b_unnorm(T f) +{ + const T f2 = pow2(f); + const T nom = pow2(12200) * pow3(f); + const T den = (f2 + pow2(20.6)) * (sqrt((f2 + pow2(158.5)))) * (f2 + pow2(12200)); - return nom / den; - } + return nom / den; +} - template <typename T> - constexpr static T weight_b_gain = reciprocal(weight_b_unnorm(T(1000.0))); +template <typename T> +constexpr static T weight_b_gain = reciprocal(weight_b_unnorm(T(1000.0))); - template <typename T> - KFR_SINTRIN T bweighting(T f) - { - return weight_b_unnorm(f) * weight_b_gain<subtype<T>>; - } +template <typename T> +KFR_SINTRIN T bweighting(T f) +{ + return weight_b_unnorm(f) * weight_b_gain<subtype<T>>; +} - template <typename T> - KFR_SINTRIN T weight_c_unnorm(T f) - { - const T f2 = pow2(f); - const T nom = pow2(12200) * f2; - const T den = (f2 + pow2(20.6)) * (f2 + pow2(12200)); +template <typename T> +KFR_SINTRIN T weight_c_unnorm(T f) +{ + const T f2 = pow2(f); + const T nom = pow2(12200) * f2; + const T den = (f2 + pow2(20.6)) * (f2 + pow2(12200)); - return nom / den; - } + return nom / den; +} - template <typename T> - constexpr static T weight_c_gain = reciprocal(weight_c_unnorm(T(1000.0))); +template <typename T> +constexpr static T weight_c_gain = reciprocal(weight_c_unnorm(T(1000.0))); - template <typename T> - KFR_SINTRIN T cweighting(T f) - { - return weight_c_unnorm(f) * weight_c_gain<subtype<T>>; - } +template <typename T> +KFR_SINTRIN T cweighting(T f) +{ + return weight_c_unnorm(f) * weight_c_gain<subtype<T>>; +} - template <typename T> - KFR_SINTRIN T aweightingdB(T f) - { - return amp_to_dB(aweighting(f)); - } - template <typename T> - KFR_SINTRIN T bweightingdB(T f) - { - return amp_to_dB(bweighting(f)); - } - template <typename T> - KFR_SINTRIN T cweightingdB(T f) - { - return amp_to_dB(cweighting(f)); - } +template <typename T> +KFR_SINTRIN T aweightingdB(T f) +{ + return amp_to_dB(aweighting(f)); +} +template <typename T> +KFR_SINTRIN T bweightingdB(T f) +{ + return amp_to_dB(bweighting(f)); +} +template <typename T> +KFR_SINTRIN T cweightingdB(T f) +{ + return amp_to_dB(cweighting(f)); +} - KFR_SPEC_FN(in_weight, aweighting) - KFR_SPEC_FN(in_weight, bweighting) - KFR_SPEC_FN(in_weight, cweighting) - KFR_SPEC_FN(in_weight, aweightingdB) - KFR_SPEC_FN(in_weight, bweightingdB) - KFR_SPEC_FN(in_weight, cweightingdB) -}; +KFR_FN(aweighting) +KFR_FN(bweighting) +KFR_FN(cweighting) +KFR_FN(aweightingdB) +KFR_FN(bweightingdB) +KFR_FN(cweightingdB) } } diff --git a/include/kfr/dsp/window.hpp b/include/kfr/dsp/window.hpp @@ -23,16 +23,12 @@ #pragma once #include "../base/log_exp.hpp" +#include "../base/modzerobessel.hpp" #include "../base/sin_cos.hpp" #include "../base/sqrt.hpp" #include "../base/vec.hpp" #include "../expressions/pointer.hpp" -#pragma clang diagnostic push -#if CID_HAS_WARNING("-Winaccessible-base") -#pragma clang diagnostic ignored "-Winaccessible-base" -#endif - namespace kfr { @@ -70,482 +66,406 @@ namespace internal { template <typename T> -constexpr T bessel_coef[] = { T(0.25), - T(0.027777777777777776236), - T(0.0017361111111111110147), - T(6.9444444444444444384e-005), - T(1.9290123456790123911e-006), - T(3.9367598891408417495e-008), - T(6.1511873267825652335e-010), - T(7.5940584281266239246e-012), - T(7.5940584281266233693e-014), - T(6.2760813455591932909e-016), - T(4.3583898233049949985e-018), - T(2.5789288895295827557e-020), - T(1.3157800456783586208e-022), - T(5.8479113141260384983e-025), - T(2.2843403570804837884e-027), - T(7.904291893012054025e-030), - T(2.4395962632753252792e-032), - T(6.75788438580422547e-035), - T(1.689471096451056426e-037), - T(3.8310002187098784929e-040), - T(7.9152897080782616517e-043), - T(1.4962740468957016443e-045), - T(2.5976979980828152196e-048), - T(4.1563167969325041577e-051), - T(6.1483976285983795968e-054), - T(8.434015951438105991e-057), - T(1.0757673407446563809e-059), - T(1.2791526049282476926e-062), - T(1.4212806721424974034e-065), - T(1.4789601166935457918e-068), - T(1.4442969889585408123e-071), - T(1.3262598613026086927e-074), - T(1.1472836170437790782e-077), - T(9.3655805472961564331e-081), - T(7.2265282000741942594e-084), - T(5.2786911614858977913e-087), - T(3.6556032974279072401e-090), - T(2.4034209713529963119e-093), - T(1.5021381070956226783e-096) }; - -template <typename T, size_t N> -KFR_INLINE vec<T, N> modzerobessel(vec<T, N> x) -{ - const vec<T, N> x_2 = x * 0.5; - const vec<T, N> x_2_sqr = x_2 * x_2; - vec<T, N> num = x_2_sqr; - vec<T, N> result; - result = 1 + x_2_sqr; - - KFR_LOOP_UNROLL - for (size_t i = 0; i < (sizeof(T) == 4 ? 20 : 39); i++) - { - result = fmadd((num *= x_2_sqr), bessel_coef<T>[i], result); - } - return result; -} +struct window_linspace_0_1 : expression_linspace<T> +{ + window_linspace_0_1(size_t size, window_symmetry symmetry) + : expression_linspace<T>(0, 1, size, symmetry == window_symmetry::symmetric) + { + } +}; + +template <typename T> +struct window_linspace_m1_1 : expression_linspace<T> +{ + window_linspace_m1_1(size_t size, window_symmetry symmetry) + : expression_linspace<T>(-1, 1, size, symmetry == window_symmetry::symmetric) + { + } +}; + +template <typename T> +struct window_linspace_mpi_pi : expression_linspace<T> +{ + window_linspace_mpi_pi(size_t size, window_symmetry symmetry) + : expression_linspace<T>(-c_pi<T>, +c_pi<T>, size, symmetry == window_symmetry::symmetric) + { + } +}; + +template <typename T> +struct window_linspace_m1_1_trunc : expression_linspace<T> +{ + window_linspace_m1_1_trunc(size_t size, window_symmetry symmetry) + : expression_linspace<T>(-T(size - 1) / size, T(size - 1) / size, size, + symmetry == window_symmetry::symmetric) + { + } +}; + +template <typename T> +struct window_linspace_m1_1_trunc2 : expression_linspace<T> +{ + window_linspace_m1_1_trunc2(size_t size, window_symmetry symmetry) + : expression_linspace<T>(symmetric_linspace, + (size & 1) ? T(size - 1) / T(size + 1) : T(size - 1) / (size), size, + symmetry == window_symmetry::symmetric) + { + } +}; -template <cpu_t cpu = cpu_t::native> -struct in_window : in_sin_cos<cpu>, in_log_exp<cpu>, in_select<cpu>, in_sqrt<cpu>, in_abs<cpu> +template <typename T> +struct expression_rectangular : input_expression { + using value_type = T; + + template <cpu_t newcpu> + using retarget_this = expression_rectangular<T>; + expression_rectangular(size_t size, T = T(), window_symmetry = window_symmetry::symmetric) : m_size(size) + { + } + template <typename U, size_t N> + KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const + { + using UI = utype<U>; + const vec<UI, N> i = enumerate(vec<UI, N>()) + cast<UI>(index); + return select(i < cast<UI>(m_size), U(1), U(0)); + } + size_t size() const { return m_size; } + private: - using in_sin_cos<cpu>::sin; - using in_sin_cos<cpu>::cos; - using in_sin_cos<cpu>::sinc; - using in_log_exp<cpu>::exp; - using in_select<cpu>::select; - using in_sqrt<cpu>::sqrt; - using in_abs<cpu>::abs; - -public: - template <typename T> - struct window_linspace_0_1 : expression_linspace<T> - { - window_linspace_0_1(size_t size, window_symmetry symmetry) - : expression_linspace<T>(0, 1, size, symmetry == window_symmetry::symmetric) - { - } - }; + size_t m_size; +}; - template <typename T> - struct window_linspace_m1_1 : expression_linspace<T> +template <typename T> +struct expression_triangular : input_expression +{ + using value_type = T; + + template <cpu_t newcpu> + using retarget_this = expression_triangular<T>; + expression_triangular(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric) + : linspace(size, symmetry), m_size(size) { - window_linspace_m1_1(size_t size, window_symmetry symmetry) - : expression_linspace<T>(-1, 1, size, symmetry == window_symmetry::symmetric) - { - } - }; + } + template <typename U, size_t N> + KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const + { + constexpr vec_t<T, N> y{}; + return cast<U>(1 - abs(linspace(cinput, index, y))); + } + size_t size() const { return m_size; } - template <typename T> - struct window_linspace_mpi_pi : expression_linspace<T> +private: + window_linspace_m1_1_trunc2<T> linspace; + size_t m_size; +}; + +template <typename T> +struct expression_bartlett : input_expression +{ + using value_type = T; + + template <cpu_t newcpu> + using retarget_this = expression_bartlett<T>; + expression_bartlett(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric) + : linspace(size, symmetry), m_size(size) { - window_linspace_mpi_pi(size_t size, window_symmetry symmetry) - : expression_linspace<T>(-c_pi<T>, +c_pi<T>, size, symmetry == window_symmetry::symmetric) - { - } - }; + } + template <typename U, size_t N> + KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const + { + constexpr vec_t<T, N> y{}; + return cast<U>(1 - abs(linspace(cinput, index, y))); + } + size_t size() const { return m_size; } - template <typename T> - struct window_linspace_m1_1_trunc : expression_linspace<T> +private: + window_linspace_m1_1<T> linspace; + size_t m_size; +}; + +template <typename T> +struct expression_cosine : input_expression +{ + using value_type = T; + + template <cpu_t newcpu> + using retarget_this = expression_cosine<T>; + expression_cosine(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric) + : linspace(size, symmetry), m_size(size) { - window_linspace_m1_1_trunc(size_t size, window_symmetry symmetry) - : expression_linspace<T>(-T(size - 1) / size, T(size - 1) / size, size, - symmetry == window_symmetry::symmetric) - { - } - }; + } + template <typename U, size_t N> + KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const + { + constexpr vec_t<T, N> y{}; + return cast<U>(sin(c_pi<T> * linspace(cinput, index, y))); + } + size_t size() const { return m_size; } + +private: + window_linspace_0_1<T> linspace; + size_t m_size; +}; - template <typename T> - struct window_linspace_m1_1_trunc2 : expression_linspace<T> +template <typename T> +struct expression_hann : input_expression +{ + using value_type = T; + + template <cpu_t newcpu> + using retarget_this = expression_hann<T>; + expression_hann(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric) + : linspace(size, symmetry), m_size(size) { - window_linspace_m1_1_trunc2(size_t size, window_symmetry symmetry) - : expression_linspace<T>(symmetric_linspace, - (size & 1) ? T(size - 1) / T(size + 1) : T(size - 1) / (size), size, - symmetry == window_symmetry::symmetric) - { - } - }; + } + template <typename U, size_t N> + KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const + { + constexpr vec_t<T, N> y{}; + return cast<U>(T(0.5) * (T(1) - cos(c_pi<T, 2> * linspace(cinput, index, y)))); + } + size_t size() const { return m_size; } - template <typename T> - struct expression_rectangular : input_expression - { - using value_type = T; - - template <cpu_t newcpu> - using retarget_this = typename in_window<newcpu>::template expression_rectangular<T>; - expression_rectangular(size_t size, T = T(), window_symmetry = window_symmetry::symmetric) - : m_size(size) - { - } - template <typename U, size_t N> - KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const - { - using UI = utype<U>; - const vec<UI, N> i = enumerate(vec<UI, N>()) + cast<UI>(index); - return select(i < cast<UI>(m_size), U(1), U(0)); - } - size_t size() const { return m_size; } - - private: - size_t m_size; - }; +private: + window_linspace_0_1<T> linspace; + size_t m_size; +}; - template <typename T> - struct expression_triangular : input_expression - { - using value_type = T; - - template <cpu_t newcpu> - using retarget_this = typename in_window<newcpu>::template expression_triangular<T>; - expression_triangular(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric) - : linspace(size, symmetry), m_size(size) - { - } - template <typename U, size_t N> - KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const - { - constexpr vec_t<T, N> y{}; - return cast<U>(1 - abs(linspace(cinput, index, y))); - } - size_t size() const { return m_size; } - - private: - window_linspace_m1_1_trunc2<T> linspace; - size_t m_size; - }; +template <typename T> +struct expression_bartlett_hann : input_expression +{ + using value_type = T; - template <typename T> - struct expression_bartlett : input_expression - { - using value_type = T; - - template <cpu_t newcpu> - using retarget_this = typename in_window<newcpu>::template expression_bartlett<T>; - expression_bartlett(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric) - : linspace(size, symmetry), m_size(size) - { - } - template <typename U, size_t N> - KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const - { - constexpr vec_t<T, N> y{}; - return cast<U>(1 - abs(linspace(cinput, index, y))); - } - size_t size() const { return m_size; } - - private: - window_linspace_m1_1<T> linspace; - size_t m_size; - }; + template <cpu_t newcpu> + using retarget_this = expression_bartlett_hann<T>; - template <typename T> - struct expression_cosine : input_expression - { - using value_type = T; - - template <cpu_t newcpu> - using retarget_this = typename in_window<newcpu>::template expression_cosine<T>; - expression_cosine(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric) - : linspace(size, symmetry), m_size(size) - { - } - template <typename U, size_t N> - KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const - { - constexpr vec_t<T, N> y{}; - return cast<U>(sin(c_pi<T> * linspace(cinput, index, y))); - } - size_t size() const { return m_size; } - - private: - window_linspace_0_1<T> linspace; - size_t m_size; - }; + expression_bartlett_hann(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric) + : linspace(size, symmetry), m_size(size) + { + } + template <typename U, size_t N> + KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const + { + constexpr vec_t<T, N> y{}; + const vec<T, N> xx = linspace(cinput, index, y); + return cast<U>(T(0.62) - T(0.48) * abs(xx - T(0.5)) + T(0.38) * cos(c_pi<T, 2> * (xx - T(0.5)))); + } + size_t size() const { return m_size; } - template <typename T> - struct expression_hann : input_expression - { - using value_type = T; - - template <cpu_t newcpu> - using retarget_this = typename in_window<newcpu>::template expression_hann<T>; - expression_hann(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric) - : linspace(size, symmetry), m_size(size) - { - } - template <typename U, size_t N> - KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const - { - constexpr vec_t<T, N> y{}; - return cast<U>(T(0.5) * (T(1) - cos(c_pi<T, 2> * linspace(cinput, index, y)))); - } - size_t size() const { return m_size; } - - private: - window_linspace_0_1<T> linspace; - size_t m_size; - }; +private: + window_linspace_0_1<T> linspace; + size_t m_size; +}; - template <typename T> - struct expression_bartlett_hann : input_expression - { - using value_type = T; - - template <cpu_t newcpu> - using retarget_this = typename in_window<newcpu>::template expression_bartlett_hann<T>; - - expression_bartlett_hann(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric) - : linspace(size, symmetry), m_size(size) - { - } - template <typename U, size_t N> - KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const - { - constexpr vec_t<T, N> y{}; - const vec<T, N> xx = linspace(cinput, index, y); - return cast<U>(T(0.62) - T(0.48) * abs(xx - T(0.5)) + T(0.38) * cos(c_pi<T, 2> * (xx - T(0.5)))); - } - size_t size() const { return m_size; } - - private: - window_linspace_0_1<T> linspace; - size_t m_size; - }; +template <typename T> +struct expression_hamming : input_expression +{ + using value_type = T; - template <typename T> - struct expression_hamming : input_expression - { - using value_type = T; - - template <cpu_t newcpu> - using retarget_this = typename in_window<newcpu>::template expression_hamming<T>; - expression_hamming(size_t size, T alpha = 0.54, window_symmetry symmetry = window_symmetry::symmetric) - : linspace(size, symmetry), alpha(alpha), m_size(size) - { - } - template <typename U, size_t N> - KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const - { - constexpr vec_t<T, N> y{}; - return cast<U>(alpha - (1.0 - alpha) * (cos(c_pi<T, 2> * linspace(cinput, index, y)))); - } - size_t size() const { return m_size; } - - private: - window_linspace_0_1<T> linspace; - T alpha; - size_t m_size; - }; + template <cpu_t newcpu> + using retarget_this = expression_hamming<T>; + expression_hamming(size_t size, T alpha = 0.54, window_symmetry symmetry = window_symmetry::symmetric) + : linspace(size, symmetry), alpha(alpha), m_size(size) + { + } + template <typename U, size_t N> + KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const + { + constexpr vec_t<T, N> y{}; + return cast<U>(alpha - (1.0 - alpha) * (cos(c_pi<T, 2> * linspace(cinput, index, y)))); + } + size_t size() const { return m_size; } - template <typename T> - struct expression_bohman : input_expression - { - using value_type = T; - - template <cpu_t newcpu> - using retarget_this = typename in_window<newcpu>::template expression_bohman<T>; - expression_bohman(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric) - : linspace(size, symmetry), m_size(size) - { - } - template <typename U, size_t N> - KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const - { - constexpr vec_t<T, N> y{}; - const vec<U, N> n = abs(linspace(cinput, index, y)); - return cast<U>((T(1) - n) * cos(c_pi<T> * n) + (T(1) / c_pi<T>)*sin(c_pi<T> * n)); - } - size_t size() const { return m_size; } - - private: - window_linspace_m1_1<T> linspace; - size_t m_size; - }; +private: + window_linspace_0_1<T> linspace; + T alpha; + size_t m_size; +}; - template <typename T> - struct expression_blackman : input_expression - { - using value_type = T; - - template <cpu_t newcpu> - using retarget_this = typename in_window<newcpu>::template expression_blackman<T>; - expression_blackman(size_t size, T alpha = 0.16, - window_symmetry symmetry = window_symmetry::symmetric) - : linspace(size, symmetry), a0((1 - alpha) * 0.5), a1(0.5), a2(alpha * 0.5), m_size(size) - { - } - template <typename U, size_t N> - KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const - { - constexpr vec_t<T, N> y{}; - const vec<T, N> n = linspace(cinput, index, y); - return cast<U>(a0 - a1 * cos(c_pi<T, 2> * n) + a2 * cos(c_pi<T, 4> * n)); - } - size_t size() const { return m_size; } - - private: - window_linspace_0_1<T> linspace; - T a0, a1, a2; - size_t m_size; - }; +template <typename T> +struct expression_bohman : input_expression +{ + using value_type = T; - template <typename T> - struct expression_blackman_harris : input_expression - { - using value_type = T; - - template <cpu_t newcpu> - using retarget_this = typename in_window<newcpu>::template expression_blackman_harris<T>; - expression_blackman_harris(size_t size, T = T(), - window_symmetry symmetry = window_symmetry::symmetric) - : linspace(size, symmetry), m_size(size) - { - } - template <typename U, size_t N> - KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const - { - constexpr vec_t<T, N> y{}; - const vec<T, N> n = linspace(cinput, index, y) * c_pi<T, 2>; - - return cast<U>(T(0.35875) - T(0.48829) * cos(n) + T(0.14128) * cos(2 * n) - - T(0.01168) * cos(3 * n)); - } - size_t size() const { return m_size; } - - private: - window_linspace_0_1<T> linspace; - size_t m_size; - }; + template <cpu_t newcpu> + using retarget_this = expression_bohman<T>; + expression_bohman(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric) + : linspace(size, symmetry), m_size(size) + { + } + template <typename U, size_t N> + KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const + { + constexpr vec_t<T, N> y{}; + const vec<U, N> n = abs(linspace(cinput, index, y)); + return cast<U>((T(1) - n) * cos(c_pi<T> * n) + (T(1) / c_pi<T>)*sin(c_pi<T> * n)); + } + size_t size() const { return m_size; } - template <typename T> - struct expression_kaiser : input_expression - { - using value_type = T; - - template <cpu_t newcpu> - using retarget_this = typename in_window<newcpu>::template expression_kaiser<T>; - expression_kaiser(size_t size, T beta = 0.5, window_symmetry symmetry = window_symmetry::symmetric) - : linspace(size, symmetry), beta(beta), m(reciprocal(modzerobessel(make_vector(beta))[0])), - m_size(size) - { - } - template <typename U, size_t N> - KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const - { - constexpr vec_t<T, N> y{}; - return cast<U>(modzerobessel(beta * sqrt(1 - sqr(linspace(cinput, index, y)))) * m); - } - size_t size() const { return m_size; } - - private: - window_linspace_m1_1<T> linspace; - T beta; - T m; - size_t m_size; - }; +private: + window_linspace_m1_1<T> linspace; + size_t m_size; +}; - template <typename T> - struct expression_flattop : input_expression - { - using value_type = T; - - template <cpu_t newcpu> - using retarget_this = typename in_window<newcpu>::template expression_flattop<T>; - expression_flattop(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric) - : linspace(size, symmetry), m_size(size) - { - } - template <typename U, size_t N> - KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const - { - constexpr vec_t<T, N> y{}; - const vec<T, N> n = linspace(cinput, index, y) * c_pi<T, 2>; - constexpr T a0 = 1; - constexpr T a1 = 1.93; - constexpr T a2 = 1.29; - constexpr T a3 = 0.388; - constexpr T a4 = 0.028; - return cast<U>(a0 - a1 * cos(n) + a2 * cos(2 * n) - a3 * cos(3 * n) + a4 * cos(4 * n)); - } - size_t size() const { return m_size; } - - private: - window_linspace_0_1<T> linspace; - size_t m_size; - }; +template <typename T> +struct expression_blackman : input_expression +{ + using value_type = T; - template <typename T> - struct expression_gaussian : input_expression - { - using value_type = T; - - template <cpu_t newcpu> - using retarget_this = typename in_window<newcpu>::template expression_gaussian<T>; - - expression_gaussian(size_t size, T alpha = 2.5, window_symmetry symmetry = window_symmetry::symmetric) - : linspace(size, symmetry), alpha(alpha), m_size(size) - { - } - template <typename U, size_t N> - KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const - { - constexpr vec_t<T, N> y{}; - return cast<U>(exp(-0.5 * sqr(alpha * linspace(cinput, index, y)))); - } - - size_t size() const { return m_size; } - private: - window_linspace_m1_1_trunc<T> linspace; - T alpha; - size_t m_size; - }; + template <cpu_t newcpu> + using retarget_this = expression_blackman<T>; + expression_blackman(size_t size, T alpha = 0.16, window_symmetry symmetry = window_symmetry::symmetric) + : linspace(size, symmetry), a0((1 - alpha) * 0.5), a1(0.5), a2(alpha * 0.5), m_size(size) + { + } + template <typename U, size_t N> + KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const + { + constexpr vec_t<T, N> y{}; + const vec<T, N> n = linspace(cinput, index, y); + return cast<U>(a0 - a1 * cos(c_pi<T, 2> * n) + a2 * cos(c_pi<T, 4> * n)); + } + size_t size() const { return m_size; } - template <typename T> - struct expression_lanczos : input_expression - { - using value_type = T; - - template <cpu_t newcpu> - using retarget_this = typename in_window<newcpu>::template expression_lanczos<T>; - expression_lanczos(size_t size, T alpha = 2.5, window_symmetry symmetry = window_symmetry::symmetric) - : linspace(size, symmetry), alpha(alpha), m_size(size) - { - } - template <typename U, size_t N> - KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const - { - constexpr vec_t<T, N> y{}; - return cast<U>(sinc(linspace(cinput, index, y))); - } - size_t size() const { return m_size; } - - private: - window_linspace_mpi_pi<T> linspace; - T alpha; - size_t m_size; - }; +private: + window_linspace_0_1<T> linspace; + T a0, a1, a2; + size_t m_size; +}; + +template <typename T> +struct expression_blackman_harris : input_expression +{ + using value_type = T; + + template <cpu_t newcpu> + using retarget_this = expression_blackman_harris<T>; + expression_blackman_harris(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric) + : linspace(size, symmetry), m_size(size) + { + } + template <typename U, size_t N> + KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const + { + constexpr vec_t<T, N> y{}; + const vec<T, N> n = linspace(cinput, index, y) * c_pi<T, 2>; + + return cast<U>(T(0.35875) - T(0.48829) * cos(n) + T(0.14128) * cos(2 * n) - T(0.01168) * cos(3 * n)); + } + size_t size() const { return m_size; } + +private: + window_linspace_0_1<T> linspace; + size_t m_size; +}; + +template <typename T> +struct expression_kaiser : input_expression +{ + using value_type = T; + + template <cpu_t newcpu> + using retarget_this = expression_kaiser<T>; + expression_kaiser(size_t size, T beta = 0.5, window_symmetry symmetry = window_symmetry::symmetric) + : linspace(size, symmetry), beta(beta), m(reciprocal(modzerobessel(make_vector(beta))[0])), + m_size(size) + { + } + template <typename U, size_t N> + KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const + { + constexpr vec_t<T, N> y{}; + return cast<U>(modzerobessel(beta * sqrt(1 - sqr(linspace(cinput, index, y)))) * m); + } + size_t size() const { return m_size; } + +private: + window_linspace_m1_1<T> linspace; + T beta; + T m; + size_t m_size; +}; + +template <typename T> +struct expression_flattop : input_expression +{ + using value_type = T; + + template <cpu_t newcpu> + using retarget_this = expression_flattop<T>; + expression_flattop(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric) + : linspace(size, symmetry), m_size(size) + { + } + template <typename U, size_t N> + KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const + { + constexpr vec_t<T, N> y{}; + const vec<T, N> n = linspace(cinput, index, y) * c_pi<T, 2>; + constexpr T a0 = 1; + constexpr T a1 = 1.93; + constexpr T a2 = 1.29; + constexpr T a3 = 0.388; + constexpr T a4 = 0.028; + return cast<U>(a0 - a1 * cos(n) + a2 * cos(2 * n) - a3 * cos(3 * n) + a4 * cos(4 * n)); + } + size_t size() const { return m_size; } + +private: + window_linspace_0_1<T> linspace; + size_t m_size; +}; + +template <typename T> +struct expression_gaussian : input_expression +{ + using value_type = T; + + template <cpu_t newcpu> + using retarget_this = expression_gaussian<T>; + + expression_gaussian(size_t size, T alpha = 2.5, window_symmetry symmetry = window_symmetry::symmetric) + : linspace(size, symmetry), alpha(alpha), m_size(size) + { + } + template <typename U, size_t N> + KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const + { + constexpr vec_t<T, N> y{}; + return cast<U>(exp(-0.5 * sqr(alpha * linspace(cinput, index, y)))); + } + + size_t size() const { return m_size; } +private: + window_linspace_m1_1_trunc<T> linspace; + T alpha; + size_t m_size; +}; + +template <typename T> +struct expression_lanczos : input_expression +{ + using value_type = T; + + template <cpu_t newcpu> + using retarget_this = expression_lanczos<T>; + expression_lanczos(size_t size, T alpha = 2.5, window_symmetry symmetry = window_symmetry::symmetric) + : linspace(size, symmetry), alpha(alpha), m_size(size) + { + } + template <typename U, size_t N> + KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const + { + constexpr vec_t<T, N> y{}; + return cast<U>(sinc(linspace(cinput, index, y))); + } + size_t size() const { return m_size; } + +private: + window_linspace_mpi_pi<T> linspace; + T alpha; + size_t m_size; }; template <window_type> @@ -556,7 +476,7 @@ struct window_by_type; struct window_by_type<window_type::win> \ { \ template <typename T> \ - using type = in_window<>::expression_##win<T>; \ + using type = expression_##win<T>; \ }; KFR_WINDOW_BY_TYPE(rectangular) KFR_WINDOW_BY_TYPE(triangular) @@ -574,83 +494,80 @@ KFR_WINDOW_BY_TYPE(gaussian) KFR_WINDOW_BY_TYPE(lanczos) } -KFR_INLINE internal::in_window<>::expression_rectangular<fbase> window_rectangular(size_t size) +KFR_INLINE internal::expression_rectangular<fbase> window_rectangular(size_t size) { - return internal::in_window<>::expression_rectangular<fbase>(size, fbase()); + return internal::expression_rectangular<fbase>(size, fbase()); } template <typename T = fbase> -KFR_INLINE internal::in_window<>::expression_triangular<T> window_triangular(size_t size, - ctype_t<T> = ctype_t<T>()) +KFR_INLINE internal::expression_triangular<T> window_triangular(size_t size, ctype_t<T> = ctype_t<T>()) { - return internal::in_window<>::expression_triangular<T>(size); + return internal::expression_triangular<T>(size); } template <typename T = fbase> -KFR_INLINE internal::in_window<>::expression_bartlett<T> window_bartlett(size_t size, - ctype_t<T> = ctype_t<T>()) +KFR_INLINE internal::expression_bartlett<T> window_bartlett(size_t size, ctype_t<T> = ctype_t<T>()) { - return internal::in_window<>::expression_bartlett<T>(size); + return internal::expression_bartlett<T>(size); } template <typename T = fbase> -KFR_INLINE internal::in_window<>::expression_cosine<T> window_cosine(size_t size, ctype_t<T> = ctype_t<T>()) +KFR_INLINE internal::expression_cosine<T> window_cosine(size_t size, ctype_t<T> = ctype_t<T>()) { - return internal::in_window<>::expression_cosine<T>(size); + return internal::expression_cosine<T>(size); } template <typename T = fbase> -KFR_INLINE internal::in_window<>::expression_hann<T> window_hann(size_t size, ctype_t<T> = ctype_t<T>()) +KFR_INLINE internal::expression_hann<T> window_hann(size_t size, ctype_t<T> = ctype_t<T>()) { - return internal::in_window<>::expression_hann<T>(size); + return internal::expression_hann<T>(size); } template <typename T = fbase> -KFR_INLINE internal::in_window<>::expression_bartlett_hann<T> window_bartlett_hann(size_t size, - ctype_t<T> = ctype_t<T>()) +KFR_INLINE internal::expression_bartlett_hann<T> window_bartlett_hann(size_t size, ctype_t<T> = ctype_t<T>()) { - return internal::in_window<>::expression_bartlett_hann<T>(size); + return internal::expression_bartlett_hann<T>(size); } template <typename T = fbase> -KFR_INLINE internal::in_window<>::expression_hamming<T> window_hamming(size_t size, T alpha = 0.54, - ctype_t<T> = ctype_t<T>()) +KFR_INLINE internal::expression_hamming<T> window_hamming(size_t size, T alpha = 0.54, + ctype_t<T> = ctype_t<T>()) { - return internal::in_window<>::expression_hamming<T>(size, alpha); + return internal::expression_hamming<T>(size, alpha); } template <typename T = fbase> -KFR_INLINE internal::in_window<>::expression_bohman<T> window_bohman(size_t size, ctype_t<T> = ctype_t<T>()) +KFR_INLINE internal::expression_bohman<T> window_bohman(size_t size, ctype_t<T> = ctype_t<T>()) { - return internal::in_window<>::expression_bohman<T>(size); + return internal::expression_bohman<T>(size); } template <typename T = fbase> -KFR_INLINE internal::in_window<>::expression_blackman<T> window_blackman( +KFR_INLINE internal::expression_blackman<T> window_blackman( size_t size, T alpha = 0.16, window_symmetry symmetry = window_symmetry::symmetric, ctype_t<T> = ctype_t<T>()) { - return internal::in_window<>::expression_blackman<T>(size, alpha, symmetry); + return internal::expression_blackman<T>(size, alpha, symmetry); } template <typename T = fbase> -KFR_INLINE internal::in_window<>::expression_blackman_harris<T> window_blackman_harris( +KFR_INLINE internal::expression_blackman_harris<T> window_blackman_harris( size_t size, window_symmetry symmetry = window_symmetry::symmetric, ctype_t<T> = ctype_t<T>()) { - return internal::in_window<>::expression_blackman_harris<T>(size, T(), symmetry); + return internal::expression_blackman_harris<T>(size, T(), symmetry); } template <typename T = fbase> -KFR_INLINE internal::in_window<>::expression_kaiser<T> window_kaiser(size_t size, T beta = T(0.5), - ctype_t<T> = ctype_t<T>()) +KFR_INLINE internal::expression_kaiser<T> window_kaiser(size_t size, T beta = T(0.5), + ctype_t<T> = ctype_t<T>()) { - return internal::in_window<>::expression_kaiser<T>(size, beta); + return internal::expression_kaiser<T>(size, beta); } template <typename T = fbase> -KFR_INLINE internal::in_window<>::expression_flattop<T> window_flattop(size_t size, ctype_t<T> = ctype_t<T>()) +KFR_INLINE internal::expression_flattop<T> window_flattop(size_t size, ctype_t<T> = ctype_t<T>()) { - return internal::in_window<>::expression_flattop<T>(size); + return internal::expression_flattop<T>(size); } template <typename T = fbase> -KFR_INLINE internal::in_window<>::expression_gaussian<T> window_gaussian(size_t size, T alpha = 2.5, - ctype_t<T> = ctype_t<T>()) +KFR_INLINE internal::expression_gaussian<T> window_gaussian(size_t size, T alpha = 2.5, + ctype_t<T> = ctype_t<T>()) { - return internal::in_window<>::expression_gaussian<T>(size, alpha); + return internal::expression_gaussian<T>(size, alpha); } template <typename T = fbase> -KFR_INLINE internal::in_window<>::expression_lanczos<T> window_lanczos(size_t size, ctype_t<T> = ctype_t<T>()) +KFR_INLINE internal::expression_lanczos<T> window_lanczos(size_t size, ctype_t<T> = ctype_t<T>()) { - return internal::in_window<>::expression_lanczos<T>(size); + return internal::expression_lanczos<T>(size); } template <typename T = fbase, window_type type, @@ -681,5 +598,3 @@ KFR_NOINLINE expression_pointer<T> window(size_t size, window_type type, T win_p fn_returns<expression_pointer<T>>()); } } - -#pragma clang diagnostic pop diff --git a/include/kfr/expressions/reduce.hpp b/include/kfr/expressions/reduce.hpp @@ -34,192 +34,99 @@ namespace kfr template <typename T> KFR_INLINE T final_mean(T value, size_t size) { - return value / size; + return value / T(size); } KFR_FN(final_mean) template <typename T> KFR_INLINE T final_rootmean(T value, size_t size) { - return internal::builtin_sqrt(value / size); + return internal::builtin_sqrt(value / T(size)); } KFR_FN(final_rootmean) namespace internal { -template <typename FinalFn, typename T, KFR_ENABLE_IF(is_callable<FinalFn, size_t, T>::value)> +template <typename FinalFn, typename T, KFR_ENABLE_IF(is_callable<FinalFn, T, size_t>::value)> KFR_INLINE auto reduce_call_final(FinalFn&& finalfn, size_t size, T value) { return finalfn(value, size); } -template <typename FinalFn, typename T, KFR_ENABLE_IF(!is_callable<FinalFn, size_t, T>::value)> +template <typename FinalFn, typename T, KFR_ENABLE_IF(!is_callable<FinalFn, T, size_t>::value)> KFR_INLINE auto reduce_call_final(FinalFn&& finalfn, size_t, T value) { return finalfn(value); } -template <cpu_t cpu = cpu_t::native> -struct in_reduce +template <typename T, typename ReduceFn, typename TransformFn, typename FinalFn, cpu_t cpu = cpu_t::native> +struct expression_reduce : output_expression { + constexpr static size_t width = vector_width<T, cpu> * bitness_const(1, 2); - template <typename T, typename ReduceFn, typename TransformFn, typename FinalFn> - struct expression_reduce : output_expression + expression_reduce(ReduceFn&& reducefn, TransformFn&& transformfn, FinalFn&& finalfn) + : counter(0), reducefn(std::move(reducefn)), transformfn(std::move(transformfn)), + finalfn(std::move(finalfn)), value(resize<width>(make_vector(reducefn(initialvalue<T>{})))) { - using Tsubtype = subtype<T>; - constexpr static size_t width = vector_width<Tsubtype, cpu> * bitness_const(1, 2); - - expression_reduce(ReduceFn&& reducefn, TransformFn&& transformfn, FinalFn&& finalfn) - : counter(0), reducefn(std::move(reducefn)), transformfn(std::move(transformfn)), - finalfn(std::move(finalfn)), value(resize<width>(make_vector(reducefn(initialvalue<T>{})))) - { - } - - template <typename U, size_t N> - KFR_INLINE void operator()(coutput_t, size_t, vec<U, N> x) const - { - counter += N; - process(x); - } - - KFR_INLINE T get() - { - return internal::reduce_call_final(finalfn, counter, horizontal(value, reducefn)); - } - - protected: - void reset() { counter = 0; } - template <size_t N, KFR_ENABLE_IF(N == width)> - KFR_INLINE void process(vec<Tsubtype, N> x) const - { - value = reducefn(transformfn(x), value); - } - - template <size_t N, KFR_ENABLE_IF(N < width)> - KFR_INLINE void process(vec<Tsubtype, N> x) const - { - value = combine(value, reducefn(transformfn(x), narrow<N>(value))); - } - - template <size_t N, KFR_ENABLE_IF(N > width)> - KFR_INLINE void process(vec<Tsubtype, N> x) const - { - process(low(x)); - process(high(x)); - } - - mutable size_t counter; - retarget<ReduceFn, cpu> reducefn; - retarget<TransformFn, cpu> transformfn; - retarget<FinalFn, cpu> finalfn; - mutable vec<Tsubtype, width> value; - }; - - template <typename ReduceFn, typename TransformFn = fn_pass_through, typename FinalFn = fn_pass_through, - typename E1, typename T = value_type_of<E1>> - KFR_SINTRIN T reduce(E1&& e1, ReduceFn&& reducefn, TransformFn&& transformfn = fn_pass_through(), - FinalFn&& finalfn = fn_pass_through()) - { - static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())"); - static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())"); - const size_t size = e1.size(); - using reducer_t = expression_reduce<T, decay<ReduceFn>, decay<TransformFn>, decay<FinalFn>>; - reducer_t red(std::forward<ReduceFn>(reducefn), std::forward<TransformFn>(transformfn), - std::forward<FinalFn>(finalfn)); - process<T, cpu>(red, std::forward<E1>(e1), size); - - return red.get(); - } - - template <typename E1, typename T = value_type_of<E1>> - KFR_SINTRIN T sum(E1&& x) - { - static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())"); - static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())"); - return reduce(std::forward<E1>(x), fn_add()); } - template <typename E1, typename T = value_type_of<E1>> - KFR_SINTRIN T mean(E1&& x) + template <typename U, size_t N> + KFR_INLINE void operator()(coutput_t, size_t, vec<U, N> x) const { - static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())"); - static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())"); - return reduce(std::forward<E1>(x), fn_add(), fn_pass_through(), fn_final_mean()); + counter += N; + process(x); } - template <typename E1, typename T = value_type_of<E1>> - KFR_SINTRIN T min(E1&& x) - { - using fn_min = typename in_min_max<cpu>::fn_min; - static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())"); - static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())"); - return reduce(std::forward<E1>(x), fn_min()); - } + KFR_INLINE T get() { return internal::reduce_call_final(finalfn, counter, horizontal(value, reducefn)); } - template <typename E1, typename T = value_type_of<E1>> - KFR_SINTRIN T max(E1&& x) - { - using fn_max = typename in_min_max<cpu>::fn_max; - static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())"); - static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())"); - return reduce(std::forward<E1>(x), fn_max()); - } +protected: + void reset() { counter = 0; } + KFR_INLINE void process(vec<T, width> x) const { value = reducefn(transformfn(x), value); } - template <typename E1, typename E2, - typename T = value_type_of<decltype(std::declval<E1>() * std::declval<E2>())>> - KFR_SINTRIN T dotproduct(E1&& x, E2&& y) + template <size_t N, KFR_ENABLE_IF(N < width)> + KFR_INLINE void process(vec<T, N> x) const { - auto m = std::forward<E1>(x) * std::forward<E2>(y); - using E12 = decltype(m); - static_assert(!is_generic<E12>::value, "e1 * e2 must be a typed expression (use typed<T>())"); - static_assert(!is_infinite<E12>::value, "e1 * e2 must be a sized expression (use typed<T>())"); - return reduce(std::move(m), fn_add()); + value = combine(value, reducefn(transformfn(x), narrow<N>(value))); } - template <typename E1, typename T = value_type_of<E1>> - KFR_SINTRIN T rms(E1&& x) + template <size_t N, KFR_ENABLE_IF(N > width)> + KFR_INLINE void process(vec<T, N> x) const { - static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())"); - static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())"); - return reduce(std::forward<E1>(x), fn_add(), fn_sqr(), fn_final_rootmean()); + process(low(x)); + process(high(x)); } - template <typename E1, typename T = value_type_of<E1>> - KFR_SINTRIN T sumsqr(E1&& x) - { - static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())"); - static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())"); - return reduce(std::forward<E1>(x), fn_add(), fn_sqr()); - } + mutable size_t counter; + retarget<ReduceFn, cpu> reducefn; + retarget<TransformFn, cpu> transformfn; + retarget<FinalFn, cpu> finalfn; + mutable vec<T, width> value; +}; - template <typename E1, typename T = value_type_of<E1>> - KFR_SINTRIN T product(E1&& x) - { - static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())"); - static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())"); - return reduce(std::forward<E1>(x), fn_mul()); - } +template <typename ReduceFn, typename TransformFn = fn_pass_through, typename FinalFn = fn_pass_through, + typename E1, typename T = value_type_of<E1>> +KFR_SINTRIN T reduce(E1&& e1, ReduceFn&& reducefn, TransformFn&& transformfn = fn_pass_through(), + FinalFn&& finalfn = fn_pass_through()) +{ + static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())"); + static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())"); + const size_t size = e1.size(); + using reducer_t = expression_reduce<T, decay<ReduceFn>, decay<TransformFn>, decay<FinalFn>>; + reducer_t red(std::forward<ReduceFn>(reducefn), std::forward<TransformFn>(transformfn), + std::forward<FinalFn>(finalfn)); + process<T>(red, std::forward<E1>(e1), size); - KFR_SPEC_FN(in_reduce, reduce) - KFR_SPEC_FN(in_reduce, sum) - KFR_SPEC_FN(in_reduce, dotproduct) - KFR_SPEC_FN(in_reduce, rms) - KFR_SPEC_FN(in_reduce, sumsqr) - KFR_SPEC_FN(in_reduce, mean) - KFR_SPEC_FN(in_reduce, min) - KFR_SPEC_FN(in_reduce, max) - KFR_SPEC_FN(in_reduce, product) -}; + return red.get(); } -namespace native -{ +KFR_FN(reduce) +} template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>::value)> KFR_SINTRIN T sum(E1&& x) { static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())"); static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())"); - return internal::in_reduce<>::sum(std::forward<E1>(x)); + return internal::reduce(std::forward<E1>(x), fn_add()); } template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>::value)> @@ -227,32 +134,50 @@ KFR_SINTRIN T mean(E1&& x) { static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())"); static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())"); - return internal::in_reduce<>::mean(std::forward<E1>(x)); + return internal::reduce(std::forward<E1>(x), fn_add(), fn_pass_through(), fn_final_mean()); } template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_SINTRIN T max(E1&& x) +KFR_SINTRIN T minof(E1&& x) { static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())"); static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())"); - return internal::in_reduce<>::max(std::forward<E1>(x)); + return internal::reduce(std::forward<E1>(x), internal::fn_min()); } template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_SINTRIN T min(E1&& x) +KFR_SINTRIN T maxof(E1&& x) { static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())"); static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())"); - return internal::in_reduce<>::min(std::forward<E1>(x)); + return internal::reduce(std::forward<E1>(x), internal::fn_max()); +} + +template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>::value)> +KFR_SINTRIN T absminof(E1&& x) +{ + static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())"); + static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())"); + return internal::reduce(std::forward<E1>(x), internal::fn_absmin()); +} + +template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>::value)> +KFR_SINTRIN T absmaxof(E1&& x) +{ + static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())"); + static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())"); + return internal::reduce(std::forward<E1>(x), internal::fn_absmax()); } template <typename E1, typename E2, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)> KFR_SINTRIN T dotproduct(E1&& x, E2&& y) { - static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())"); - static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())"); - return internal::in_reduce<>::dotproduct(std::forward<E1>(x), std::forward<E2>(y)); + auto m = std::forward<E1>(x) * std::forward<E2>(y); + using E12 = decltype(m); + static_assert(!is_generic<E12>::value, "e1 must be a typed expression (use typed<T>())"); + static_assert(!is_infinite<E12>::value, "e1 must be a sized expression (use typed<T>())"); + return internal::reduce(std::move(m), fn_add()); } template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>::value)> @@ -260,7 +185,7 @@ KFR_SINTRIN T rms(E1&& x) { static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())"); static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())"); - return internal::in_reduce<>::rms(std::forward<E1>(x)); + return internal::reduce(std::forward<E1>(x), fn_add(), fn_sqr(), fn_final_rootmean()); } template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>::value)> @@ -268,7 +193,7 @@ KFR_SINTRIN T sumsqr(E1&& x) { static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())"); static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())"); - return internal::in_reduce<>::sumsqr(std::forward<E1>(x)); + return internal::reduce(std::forward<E1>(x), fn_add(), fn_sqr()); } template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>::value)> @@ -276,7 +201,6 @@ KFR_SINTRIN T product(E1&& x) { static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())"); static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())"); - return internal::in_reduce<>::product(std::forward<E1>(x)); -} + return internal::reduce(std::forward<E1>(x), fn_mul()); } } diff --git a/include/kfr/math.hpp b/include/kfr/math.hpp @@ -45,7 +45,3 @@ #include "base/sqrt.hpp" #include "base/tan.hpp" -namespace kfr -{ -using namespace native; -} diff --git a/sources.cmake b/sources.cmake @@ -20,6 +20,7 @@ set( ${PROJECT_SOURCE_DIR}/include/kfr/base/abs.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/asin_acos.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/atan.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/clamp.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/complex.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/constants.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/digitreverse.hpp @@ -31,6 +32,7 @@ set( ${PROJECT_SOURCE_DIR}/include/kfr/base/logical.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/memory.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/min_max.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/modzerobessel.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/operators.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/read_write.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/round.hpp @@ -71,7 +73,6 @@ set( ${PROJECT_SOURCE_DIR}/include/kfr/expressions/basic.hpp ${PROJECT_SOURCE_DIR}/include/kfr/expressions/conversion.hpp ${PROJECT_SOURCE_DIR}/include/kfr/expressions/generators.hpp - ${PROJECT_SOURCE_DIR}/include/kfr/expressions/operators.hpp ${PROJECT_SOURCE_DIR}/include/kfr/expressions/pointer.hpp ${PROJECT_SOURCE_DIR}/include/kfr/expressions/reduce.hpp ${PROJECT_SOURCE_DIR}/include/kfr/io/audiofile.hpp diff --git a/tests/basic_vector_test.cpp b/tests/basic_vector_test.cpp @@ -11,142 +11,97 @@ #include <kfr/vec.hpp> #include <kfr/version.hpp> +#include "testo/testo.hpp" + using namespace kfr; -using namespace kfr::native; -template <typename T> -void print_type(const T& value) +TEST(test) { - println(type_name(value), ":"); - println(value); -} - -int main(int /*argc*/, char** /*argv*/) -{ - println(library_version()); - // >>> KFR ... - // How to make a vector: // * Use constructor const vec<double, 4> first{ 1, 2.5, -infinity, 3.1415926 }; - print_type(first); - // >>> kfr::vec<double, 4>: - // >>> 1 2.5 -inf 3.14159 + CHECK(first == vec<double, 4>{ 1, 2.5, -infinity, 3.1415926 }); // * Use make_vector function const auto second = make_vector(-1, +1); - print_type(second); - // >>> kfr::vec<int, 2>: - // >>> -1 1 + CHECK(second == vec<int, 2>{ -1, 1 }); // * Convert from vector of other type: const vec<int, 4> int_vector{ 10, 20, 30, 40 }; const vec<double, 4> double_vector = cast<double>(int_vector); - print_type(double_vector); - // >>> kfr::vec<double, 4>: - // >>> 10 20 30 40 + CHECK(double_vector == vec<double, 4>{ 10, 20, 30, 40 }); // * Concat two vectors: const vec<int, 1> left_part{ 1 }; const vec<int, 1> right_part{ 2 }; const vec<int, 2> pair{ left_part, right_part }; - print_type(pair); - // >>> kfr::vec<int, 2>: - // >>> 1 2 + CHECK(pair == vec<int, 2>{ 1, 2 }); // * Same, but using make_vector and concat: const vec<int, 2> pair2 = concat(make_vector(10), make_vector(20)); - print_type(pair2); - // >>> kfr::vec<int, 2>: - // >>> 10 20 + CHECK(pair2 == vec<int, 2>{ 10, 20 }); // * Repeat vector multiple times: const vec<short, 8> repeated = repeat<4>(make_vector<short>(0, -1)); - print_type(repeated); - // >>> kfr::vec<short, 8>: - // >>> 0 -1 0 -1 0 -1 0 -1 + CHECK(repeated == vec<short, 8>{ 0, -1, 0, -1, 0, -1, 0, -1 }); // * Use enumerate to generate sequence of numbers: const vec<int, 8> eight = enumerate<int, 8>(); - print_type(eight); - // >>> kfr::vec<int, 8>: - // >>> 0 1 2 3 4 5 6 7 + CHECK(eight == vec<int, 8>{ 0, 1, 2, 3, 4, 5, 6, 7 }); // * Vectors can be of any length... const vec<int, 1> one{ 42 }; const vec<int, 2> two = concat(one, make_vector(42)); - print_type(two); - // >>> kfr::vec<int, 2>: - // >>> 42 42 + CHECK(two == vec<int, 2>{ 42, 42 }); const vec<u8, 256> very_long_vector = repeat<64>(make_vector<u8>(1, 2, 4, 8)); - print_type(slice<0, 17>(very_long_vector)); - // >>> kfr::vec<unsigned char, 17>: - // >>> 1 2 4 8 1 2 4 8 - // >>> 1 2 4 8 1 2 4 8 - // >>> 1 + CHECK(slice<0, 17>(very_long_vector) == + vec<unsigned char, 17>{ 1, 2, 4, 8, 1, 2, 4, 8, 1, 2, 4, 8, 1, 2, 4, 8, 1 }); // * ...really any: using big_vector = vec<i16, 107>; big_vector v107 = enumerate<i16, 107>(); - print_type(hadd(v107)); - // >>> short: - // >>> 5671 + CHECK(hadd(v107) == static_cast<short>(5671)); using color = vec<u8, 3>; const color green = cast<u8>(make_vector(0.0, 1.0, 0.0) * 255); - print_type(green); - // >>> kfr::vec<unsigned char, 3>: - // >>> 0 255 0 + CHECK(green == vec<unsigned char, 3>{ 0, 255, 0 }); // Vectors support all standard operators: const auto op1 = make_vector(0, 1, 10, 100); const auto op2 = make_vector(20, 2, -2, 200); const auto result = op1 * op2 - 4; - print_type(result); - // >>> kfr::vec<int, 4>: - // >>> -4 -2 -24 19996 + CHECK(result == vec<int, 4>{ -4, -2, -24, 19996 }); // * Transform vector: const vec<int, 8> numbers1 = enumerate<int, 8>(); const vec<int, 8> numbers2 = enumerate<int, 8>() + 100; - print_type(odd(numbers1)); - print_type(even(numbers2)); - // >>> kfr::vec<int, 4>: - // >>> 1 3 5 7 - // >>> kfr::vec<int, 4>: - // >>> 100 102 104 106 + CHECK(odd(numbers1) == vec<int, 4>{ 1, 3, 5, 7 }); + CHECK(even(numbers2) == vec<int, 4>{ 100, 102, 104, 106 }); // * The following command pairs are equivalent: - print_type(permute<0, 2, 1, 3, 4, 6, 5, 7>(numbers1)); - print_type(permute<0, 2, 1, 3>(numbers1)); - // >>> kfr::vec<int, 8>: - // >>> 0 2 1 3 4 6 5 7 - // >>> kfr::vec<int, 8>: - // >>> 0 2 1 3 4 6 5 7 - - print_type(shuffle<0, 8, 2, 10, 4, 12, 6, 14>(numbers1, numbers2)); - print_type(shuffle<0, 8>(numbers1, numbers2)); - // >>> kfr::vec<int, 8>: - // >>> 0 100 2 102 4 104 6 106 - // >>> kfr::vec<int, 8>: - // >>> 0 100 2 102 4 104 6 106 - - print_type(blend<0, 1, 1, 0, 1, 1, 0, 1>(numbers1, numbers2)); - print_type(blend<0, 1, 1>(numbers1, numbers2)); - // >>> kfr::vec<int, 8>: - // >>> 0 101 102 3 104 105 6 107 - // >>> kfr::vec<int, 8>: - // >>> 0 101 102 3 104 105 6 107 + CHECK(permute(numbers1, elements<0, 2, 1, 3, 4, 6, 5, 7>) == vec<int, 8>{ 0, 2, 1, 3, 4, 6, 5, 7 }); + CHECK(permute(numbers1, elements<0, 2, 1, 3>) == vec<int, 8>{ 0, 2, 1, 3, 4, 6, 5, 7 }); + + CHECK(shuffle(numbers1, numbers2, elements<0, 8, 2, 10, 4, 12, 6, 14>) == + vec<int, 8>{ 0, 100, 2, 102, 4, 104, 6, 106 }); + CHECK(shuffle(numbers1, numbers2, elements<0, 8>) == vec<int, 8>{ 0, 100, 2, 102, 4, 104, 6, 106 }); + + CHECK(blend(numbers1, numbers2, elements<0, 1, 1, 0, 1, 1, 0, 1>) == + vec<int, 8>{ 0, 101, 102, 3, 104, 105, 6, 107 }); + CHECK(blend(numbers1, numbers2, elements<0, 1, 1>) == + vec<int, 8>{ 0, 101, 102, 3, 104, 105, 6, 107 }); // * Transpose matrix: const auto sixteen = enumerate<float, 16>(); - print_type(transpose<4>(sixteen)); - // >>> kfr::vec<float, 16>: - // >>> 0 4 8 12 1 5 9 13 - // >>> 2 6 10 14 3 7 11 15 - // >>> + CHECK(transpose<4>(sixteen) == + vec<float, 16>{ 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 }); +} + +int main(int /*argc*/, char** /*argv*/) +{ + println(library_version()); - return 0; + return testo::run_all("", true); } diff --git a/tests/complex_test.cpp b/tests/complex_test.cpp @@ -10,7 +10,6 @@ #include <kfr/base/complex.hpp> #include <kfr/cometa/string.hpp> #include <kfr/expressions/basic.hpp> -#include <kfr/expressions/operators.hpp> #include <kfr/expressions/reduce.hpp> #include <kfr/math.hpp> #include <kfr/version.hpp> @@ -23,6 +22,7 @@ void assert_is_same() static_assert(std::is_same<T1, T2>::value, ""); } + TEST(complex_vector) { const vec<c32, 1> c32x1{ c32{ 0, 1 } }; @@ -179,6 +179,9 @@ int main(int argc, char** argv) static_assert(vector_width<i32, cpu_t::sse2> == 4, ""); static_assert(vector_width<complex<i32>, cpu_t::sse2> == 2, ""); + static_assert(is_numeric<vec<complex<float>, 4>>::value, ""); + static_assert(is_numeric_args<vec<complex<float>, 4>>::value, ""); + static_assert(sizeof(vec<c32, 4>) == sizeof(vec<f32, 8>), ""); static_assert(vec<f32, 4>::size() == 4, ""); static_assert(vec<c32, 4>::size() == 4, ""); diff --git a/tests/conv_test.cpp b/tests/conv_test.cpp @@ -23,7 +23,7 @@ TEST(test_convolve) univector<double, 5> b({ 0.25, 0.5, 1.0, 0.5, 0.25 }); univector<double> c = convolve(a, b); CHECK(c.size() == 9); - CHECK(native::rms(c - univector<double>({ 0.25, 1., 2.75, 5., 7.5, 8.5, 7.75, 3.5, 1.25 })) < 0.0001); + CHECK(rms(c - univector<double>({ 0.25, 1., 2.75, 5., 7.5, 8.5, 7.75, 3.5, 1.25 })) < 0.0001); } int main(int argc, char** argv) diff --git a/tests/dft_test.cpp b/tests/dft_test.cpp @@ -14,7 +14,6 @@ #include <kfr/dft/fft.hpp> #include <kfr/dft/reference_dft.hpp> #include <kfr/expressions/basic.hpp> -#include <kfr/expressions/operators.hpp> #include <kfr/expressions/reduce.hpp> #include <kfr/io/tostring.hpp> #include <kfr/math.hpp> diff --git a/tests/empty_test.cpp b/tests/empty_test.cpp @@ -1,7 +1,5 @@ #include <kfr/math.hpp> -#include <kfr/vec.hpp> using namespace kfr; -using namespace kfr::native; int main(int argc, char** argv) { return 0; } diff --git a/tests/fracdelay_test.cpp b/tests/fracdelay_test.cpp @@ -20,17 +20,17 @@ using namespace kfr; TEST(test_fracdelay) { univector<double, 5> a({ 1, 2, 3, 4, 5 }); - univector<double, 5> b = native::fracdelay(a, 0.5); - CHECK(native::rms(b - univector<double>({ 0.5, 1.5, 2.5, 3.5, 4.5 })) < c_epsilon<double> * 5); + univector<double, 5> b = fracdelay(a, 0.5); + CHECK(rms(b - univector<double>({ 0.5, 1.5, 2.5, 3.5, 4.5 })) < c_epsilon<double> * 5); - b = native::fracdelay(a, 0.1); - CHECK(native::rms(b - univector<double>({ 0.9, 1.9, 2.9, 3.9, 4.9 })) < c_epsilon<double> * 5); + b = fracdelay(a, 0.1); + CHECK(rms(b - univector<double>({ 0.9, 1.9, 2.9, 3.9, 4.9 })) < c_epsilon<double> * 5); - b = native::fracdelay(a, 0.0); - CHECK(native::rms(b - univector<double>({ 1, 2, 3, 4, 5 })) < c_epsilon<double> * 5); + b = fracdelay(a, 0.0); + CHECK(rms(b - univector<double>({ 1, 2, 3, 4, 5 })) < c_epsilon<double> * 5); - b = native::fracdelay(a, 1.0); - CHECK(native::rms(b - univector<double>({ 0, 1, 2, 3, 4 })) < c_epsilon<double> * 5); + b = fracdelay(a, 1.0); + CHECK(rms(b - univector<double>({ 0, 1, 2, 3, 4 })) < c_epsilon<double> * 5); } int main(int argc, char** argv) diff --git a/tests/stat_test.cpp b/tests/stat_test.cpp @@ -20,32 +20,32 @@ TEST(test_stat) { { univector<float, 5> a({ 1, 2, 3, 4, 5 }); - CHECK(native::sum(a) == 15); - CHECK(native::mean(a) == 3); - CHECK(native::min(a) == 1); - CHECK(native::max(a) == 5); - CHECK(native::sumsqr(a) == 55); - CHECK(native::rms(a) == 3.316624790355399849115f); - CHECK(native::product(a) == 120); + CHECK(sum(a) == 15); + CHECK(mean(a) == 3); + CHECK(minof(a) == 1); + CHECK(maxof(a) == 5); + CHECK(sumsqr(a) == 55); + CHECK(rms(a) == 3.316624790355399849115f); + CHECK(product(a) == 120); } { univector<double, 5> a({ 1, 2, 3, 4, 5 }); - CHECK(native::sum(a) == 15); - CHECK(native::mean(a) == 3); - CHECK(native::min(a) == 1); - CHECK(native::max(a) == 5); - CHECK(native::sumsqr(a) == 55); - CHECK(native::rms(a) == 3.316624790355399849115); - CHECK(native::product(a) == 120); + CHECK(sum(a) == 15); + CHECK(mean(a) == 3); + CHECK(minof(a) == 1); + CHECK(maxof(a) == 5); + CHECK(sumsqr(a) == 55); + CHECK(rms(a) == 3.316624790355399849115); + CHECK(product(a) == 120); } { univector<int, 5> a({ 1, 2, 3, 4, 5 }); - CHECK(native::sum(a) == 15); - CHECK(native::mean(a) == 3); - CHECK(native::min(a) == 1); - CHECK(native::max(a) == 5); - CHECK(native::sumsqr(a) == 55); - CHECK(native::product(a) == 120); + CHECK(sum(a) == 15); + CHECK(mean(a) == 3); + CHECK(minof(a) == 1); + CHECK(maxof(a) == 5); + CHECK(sumsqr(a) == 55); + CHECK(product(a) == 120); } }