commit 3945a8f00b5062cc698afda300a07fddcdf3ba3a
parent 6ff1acd1d2216a0655755c48a805cb14c130c150
Author: d.levin256@gmail.com <d.levin256@gmail.com>
Date: Mon, 25 Jul 2016 13:28:15 +0300
Refactoring: Improve compilation time
Diffstat:
44 files changed, 3271 insertions(+), 4444 deletions(-)
diff --git a/examples/biquads.cpp b/examples/biquads.cpp
@@ -25,13 +25,11 @@
#include <kfr/io/python_plot.hpp>
using namespace kfr;
-using namespace kfr::native;
int main(int argc, char** argv)
{
println(library_version());
- using namespace native;
const std::string options = "phaseresp=True";
univector<double, 128> output;
diff --git a/examples/dft.cpp b/examples/dft.cpp
@@ -17,7 +17,6 @@
#include <kfr/dsp/oscillators.hpp>
#include <kfr/dsp/units.hpp>
#include <kfr/expressions/basic.hpp>
-#include <kfr/expressions/operators.hpp>
#include <kfr/expressions/reduce.hpp>
#include <kfr/math.hpp>
#include <kfr/misc/random.hpp>
@@ -52,8 +51,8 @@ int main(int argc, char** argv)
// get magnitude and convert to decibels
univector<float_type, size> dB = amp_to_dB(cabs(out));
- println("max = ", max(dB));
- println("min = ", min(dB));
+ println("max = ", maxof(dB));
+ println("min = ", minof(dB));
println("mean = ", mean(dB));
println("rms = ", rms(dB));
diff --git a/examples/fir.cpp b/examples/fir.cpp
@@ -30,13 +30,11 @@
#include <iostream>
using namespace kfr;
-using namespace kfr::native;
int main(int argc, char** argv)
{
println(library_version());
- using namespace native;
const std::string options = "phaseresp=False";
univector<double, 15> taps15;
diff --git a/examples/resampling.cpp b/examples/resampling.cpp
@@ -21,16 +21,12 @@
// swept
#include <kfr/dsp/oscillators.hpp>
-// operator overloading for expressions
-#include <kfr/expressions/operators.hpp>
-
// plot_save()
#include <kfr/io/python_plot.hpp>
#include <iostream>
using namespace kfr;
-using namespace kfr::native;
constexpr size_t input_sr = 96000;
constexpr size_t output_sr = 44100;
@@ -41,7 +37,6 @@ int main(int argc, char** argv)
{
println(library_version());
- using namespace native;
const std::string options = "phaseresp=False";
univector<f64> swept_sine = swept(0.5, len);
diff --git a/examples/window.cpp b/examples/window.cpp
@@ -22,13 +22,11 @@
#include <kfr/io/python_plot.hpp>
using namespace kfr;
-using namespace kfr::native;
int main(int argc, char** argv)
{
println(library_version());
- using namespace native;
const std::string options = "freqresp=True, dots=True, padwidth=1024, "
"log_freq=False, horizontal=False, normalized_freq=True";
diff --git a/include/kfr/base/abs.hpp b/include/kfr/base/abs.hpp
@@ -26,114 +26,68 @@
#include "operators.hpp"
#include "select.hpp"
-#pragma clang diagnostic push
-#if CID_HAS_WARNING("-Winaccessible-base")
-#pragma clang diagnostic ignored "-Winaccessible-base"
-#endif
-
namespace kfr
{
namespace internal
{
-
-template <cpu_t cpu = cpu_t::native, cpu_t cc = cpu>
-struct in_abs : in_abs<older(cpu), cc>
+// floating point
+template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)>
+KFR_SINTRIN vec<T, N> abs(vec<T, N> x)
{
- struct fn_abs : in_abs<older(cpu), cc>::fn_abs, fn_disabled
- {
- };
-};
-
-template <cpu_t cc>
-struct in_abs<cpu_t::common, cc> : in_select<cc>
-{
- constexpr static cpu_t cpu = cpu_t::common;
-
-private:
- using in_select<cc>::select;
-
-public:
- template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)>
- KFR_SINTRIN vec<T, N> abs(vec<T, N> value)
- {
- return select(value >= T(), value, -value);
- }
- template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)>
- KFR_SINTRIN vec<T, N> abs(vec<T, N> value)
- {
- return value & invhighbitmask<T>;
- }
-
- KFR_HANDLE_SCALAR(abs)
- KFR_SPEC_FN(in_abs, abs)
-};
+ return x & internal::invhighbitmask<T>;
+}
-#ifdef CID_ARCH_X86
+#if defined CID_ARCH_SSSE3
-template <cpu_t cc>
-struct in_abs<cpu_t::ssse3, cc> : in_select<cc>
+template <typename T, size_t N>
+KFR_SINTRIN vec<i64, N> abs(vec<i64, N> x)
{
- constexpr static cpu_t cpu = cpu_t::ssse3;
-
-private:
- using in_select<cc>::select;
-
-public:
- template <size_t N>
- KFR_SINTRIN vec<i64, N> abs(vec<i64, N> value)
- {
- return select(value >= 0, value, -value);
- }
-
- KFR_CPU_INTRIN(ssse3) i32sse abs(i32sse value) { return _mm_abs_epi32(*value); }
- KFR_CPU_INTRIN(ssse3) i16sse abs(i16sse value) { return _mm_abs_epi16(*value); }
- KFR_CPU_INTRIN(ssse3) i8sse abs(i8sse value) { return _mm_abs_epi8(*value); }
-
- template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)>
- KFR_SINTRIN vec<T, N> abs(vec<T, N> value)
- {
- return value & invhighbitmask<T>;
- }
-
- KFR_HANDLE_ALL(abs)
- KFR_HANDLE_SCALAR(abs)
- KFR_SPEC_FN(in_abs, abs)
-};
+ return select(x >= T(), x, -x);
+}
+KFR_SINTRIN i32sse abs(i32sse value) { return _mm_abs_epi32(*value); }
+KFR_SINTRIN i16sse abs(i16sse value) { return _mm_abs_epi16(*value); }
+KFR_SINTRIN i8sse abs(i8sse value) { return _mm_abs_epi8(*value); }
+
+#if defined CID_ARCH_AVX2
+KFR_SINTRIN i32avx abs(i32avx value) { return _mm256_abs_epi32(*value); }
+KFR_SINTRIN i16avx abs(i16avx value) { return _mm256_abs_epi16(*value); }
+KFR_SINTRIN i8avx abs(i8avx value) { return _mm256_abs_epi8(*value); }
+#endif
-template <cpu_t cc>
-struct in_abs<cpu_t::avx2, cc> : in_abs<cpu_t::ssse3, cc>
+template <typename T, size_t N, KFR_ENABLE_IF(N < vector_width<T, cpu_t::native> && !is_f_class<T>::value)>
+KFR_SINTRIN vec<T, N> abs(vec<T, N> a)
{
- constexpr static cpu_t cpu = cpu_t::avx2;
- using in_abs<cpu_t::ssse3, cc>::abs;
+ return slice<0, N>(abs(expand_simd(a)));
+}
+template <typename T, size_t N, KFR_ENABLE_IF(N >= vector_width<T, cpu_t::native> && !is_f_class<T>::value)>
+KFR_SINTRIN vec<T, N> abs(vec<T, N> a)
+{
+ return concat(abs(low(a)), abs(high(a)));
+}
- KFR_CPU_INTRIN(avx2) i32avx abs(i32avx value) { return _mm256_abs_epi32(*value); }
- KFR_CPU_INTRIN(avx2) i16avx abs(i16avx value) { return _mm256_abs_epi16(*value); }
- KFR_CPU_INTRIN(avx2) i8avx abs(i8avx value) { return _mm256_abs_epi8(*value); }
+#else
- KFR_HANDLE_ALL(abs)
- KFR_HANDLE_SCALAR(abs)
- KFR_SPEC_FN(in_abs, abs)
-};
+// fallback
+template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)>
+KFR_SINTRIN vec<T, N> abs(vec<T, N> x)
+{
+ return select(value >= T(), value, -value);
+}
#endif
+KFR_HANDLE_SCALAR_1(abs)
+KFR_FN(abs)
}
-namespace native
-{
-using fn_abs = internal::in_abs<>::fn_abs;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> abs(const T1& x)
+KFR_INTRIN T1 abs(const T1& x)
{
- return internal::in_abs<>::abs(x);
+ return internal::abs(x);
}
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-
-KFR_INTRIN expr_func<fn_abs, E1> abs(E1&& x)
+KFR_INTRIN expr_func<internal::fn_abs, E1> abs(E1&& x)
{
- return { fn_abs(), std::forward<E1>(x) };
-}
+ return { {}, std::forward<E1>(x) };
}
}
-
-#pragma clang diagnostic pop
diff --git a/include/kfr/base/asin_acos.hpp b/include/kfr/base/asin_acos.hpp
@@ -22,79 +22,55 @@
*/
#pragma once
-#include "abs.hpp"
#include "atan.hpp"
-#include "constants.hpp"
#include "function.hpp"
-#include "min_max.hpp"
-#include "operators.hpp"
#include "select.hpp"
-#include "shuffle.hpp"
#include "sqrt.hpp"
-#pragma clang diagnostic push
-#if CID_HAS_WARNING("-Winaccessible-base")
-#pragma clang diagnostic ignored "-Winaccessible-base"
-#endif
-
namespace kfr
{
namespace internal
{
-template <cpu_t cpu = cpu_t::native>
-struct in_asin_acos : private in_select<cpu>, private in_atan<cpu>, private in_sqrt<cpu>
+template <typename T, size_t N>
+KFR_SINTRIN vec<T, N> asin(vec<T, N> x)
{
-private:
- using in_atan<cpu>::atan2;
- using in_sqrt<cpu>::sqrt;
-
-public:
- template <typename T, size_t N>
- KFR_SINTRIN vec<T, N> asin(vec<T, N> x)
- {
- return atan2(x, sqrt(T(1) - x * x));
- }
-
- template <typename T, size_t N>
- KFR_SINTRIN vec<T, N> acos(vec<T, N> x)
- {
- return atan2(sqrt(T(1) - x * x), x);
- }
- KFR_SPEC_FN(in_asin_acos, asin)
- KFR_SPEC_FN(in_asin_acos, acos)
-};
+ return atan2(x, sqrt(T(1) - x * x));
}
-namespace native
+template <typename T, size_t N>
+KFR_SINTRIN vec<T, N> acos(vec<T, N> x)
{
-using fn_asin = internal::in_asin_acos<>::fn_asin;
+ return atan2(sqrt(T(1) - x * x), x);
+}
+KFR_HANDLE_SCALAR(asin)
+KFR_HANDLE_SCALAR(acos)
+KFR_FN(asin)
+KFR_FN(acos)
+}
+
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> asin(const T1& x)
+KFR_INTRIN T1 asin(const T1& x)
{
- return internal::in_asin_acos<>::asin(x);
+ return internal::asin(x);
}
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_asin, E1> asin(E1&& x)
+KFR_INTRIN expr_func<internal::fn_asin, E1> asin(E1&& x)
{
- return { fn_asin(), std::forward<E1>(x) };
+ return { {}, std::forward<E1>(x) };
}
-using fn_acos = internal::in_asin_acos<>::fn_acos;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> acos(const T1& x)
+KFR_INTRIN T1 acos(const T1& x)
{
- return internal::in_asin_acos<>::acos(x);
+ return internal::acos(x);
}
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_acos, E1> acos(E1&& x)
+KFR_INTRIN expr_func<internal::fn_acos, E1> acos(E1&& x)
{
- return { fn_acos(), std::forward<E1>(x) };
+ return { {}, std::forward<E1>(x) };
}
}
-}
-
-#pragma clang diagnostic pop
diff --git a/include/kfr/base/atan.hpp b/include/kfr/base/atan.hpp
@@ -28,240 +28,231 @@
#include "select.hpp"
#include "sin_cos.hpp"
-#pragma clang diagnostic push
-#if CID_HAS_WARNING("-Winaccessible-base")
-#pragma clang diagnostic ignored "-Winaccessible-base"
-#endif
-
namespace kfr
{
namespace internal
{
-template <cpu_t c = cpu_t::native, cpu_t cc = c>
-struct in_atan : in_trig<cc>, in_select<cc>, in_round<cc>, in_abs<cc>
+template <size_t N>
+KFR_SINTRIN vec<f32, N> atan2k(vec<f32, N> y, vec<f32, N> x)
{
-private:
- using in_abs<cc>::abs;
- using in_round<cc>::floor;
- using in_select<cc>::select;
- using in_trig<cc>::mask_horner;
- using in_select<cc>::sign;
+ vec<f32, N> s, t, u;
+ vec<i32, N> q;
+ q = select(x < 0, -2, 0);
+ x = select(x < 0, -x, x);
+ mask<i32, N> m;
+ m = y > x;
+ t = x;
+ x = select(m, y, x);
+ y = select(m, -t, y);
+ q = select(m, q + 1, q);
+ s = y / x;
+ t = s * s;
+ u = 0.00282363896258175373077393f;
+ u = fmadd(u, t, -0.0159569028764963150024414f);
+ u = fmadd(u, t, 0.0425049886107444763183594f);
+ u = fmadd(u, t, -0.0748900920152664184570312f);
+ u = fmadd(u, t, 0.106347933411598205566406f);
+ u = fmadd(u, t, -0.142027363181114196777344f);
+ u = fmadd(u, t, 0.199926957488059997558594f);
+ u = fmadd(u, t, -0.333331018686294555664062f);
+ t = u * t * s + s;
+ t = cast<f32>(q) * 1.5707963267948966192313216916398f + t;
+ return t;
+}
-public:
- template <size_t N>
- KFR_SINTRIN vec<f32, N> atan2k(vec<f32, N> y, vec<f32, N> x)
- {
- vec<f32, N> s, t, u;
- vec<i32, N> q;
- q = select(x < 0, -2, 0);
- x = select(x < 0, -x, x);
- mask<i32, N> m;
- m = y > x;
- t = x;
- x = select(m, y, x);
- y = select(m, -t, y);
- q = select(m, q + 1, q);
- s = y / x;
- t = s * s;
- u = 0.00282363896258175373077393f;
- u = fmadd(u, t, -0.0159569028764963150024414f);
- u = fmadd(u, t, 0.0425049886107444763183594f);
- u = fmadd(u, t, -0.0748900920152664184570312f);
- u = fmadd(u, t, 0.106347933411598205566406f);
- u = fmadd(u, t, -0.142027363181114196777344f);
- u = fmadd(u, t, 0.199926957488059997558594f);
- u = fmadd(u, t, -0.333331018686294555664062f);
- t = u * t * s + s;
- t = cast<f32>(q) * 1.5707963267948966192313216916398f + t;
- return t;
- }
- template <size_t N>
- KFR_SINTRIN vec<f64, N> atan2k(vec<f64, N> y, vec<f64, N> x)
- {
- vec<f64, N> s, t, u;
- vec<i64, N> q;
- q = select(x < 0, -2ll, 0ll);
- x = select(x < 0, -x, x);
- vec<i64, N> m;
- m = y > x;
- t = x;
- x = select(m, y, x);
- y = select(m, -t, y);
- q = select(m, q + 1ll, q);
- s = y / x;
- t = s * s;
- u = -1.88796008463073496563746e-05;
- u = fmadd(u, t, 0.000209850076645816976906797);
- u = fmadd(u, t, -0.00110611831486672482563471);
- u = fmadd(u, t, 0.00370026744188713119232403);
- u = fmadd(u, t, -0.00889896195887655491740809);
- u = fmadd(u, t, 0.016599329773529201970117);
- u = fmadd(u, t, -0.0254517624932312641616861);
- u = fmadd(u, t, 0.0337852580001353069993897);
- u = fmadd(u, t, -0.0407629191276836500001934);
- u = fmadd(u, t, 0.0466667150077840625632675);
- u = fmadd(u, t, -0.0523674852303482457616113);
- u = fmadd(u, t, 0.0587666392926673580854313);
- u = fmadd(u, t, -0.0666573579361080525984562);
- u = fmadd(u, t, 0.0769219538311769618355029);
- u = fmadd(u, t, -0.090908995008245008229153);
- u = fmadd(u, t, 0.111111105648261418443745);
- u = fmadd(u, t, -0.14285714266771329383765);
- u = fmadd(u, t, 0.199999999996591265594148);
- u = fmadd(u, t, -0.333333333333311110369124);
- t = u * t * s + s;
- t = cast<f64>(q) * 1.5707963267948966192313216916398 + t;
- return t;
- }
- template <size_t N>
- KFR_SINTRIN vec<f32, N> atan2(vec<f32, N> y, vec<f32, N> x)
- {
- vec<f32, N> r = atan2k(abs(y), x);
- constexpr f32 pi = 3.1415926535897932384626433832795f;
- constexpr f32 pi_over_2 = 1.5707963267948966192313216916398f;
- constexpr f32 pi_over_4 = 0.78539816339744830961566084581988f;
- r = mulsign(r, x);
- r = select(isinf(x) || x == 0.0f, pi_over_2 - select(x.asmask(), mulsign(pi_over_2, x), 0.0f), r);
- r = select(isinf(y), pi_over_2 - select(x.asmask(), mulsign(pi_over_4, x), 0.0f), r);
- r = select(y == 0.0f, fbitcast(ibitcast(sign(x) == -1.0f) & ibitcast(pi)), r);
- r = fbitcast(ibitcast(isnan(x) || isnan(y)) | ibitcast(mulsign(r, y)));
- return r;
- }
- template <size_t N>
- KFR_SINTRIN vec<f64, N> atan2(vec<f64, N> y, vec<f64, N> x)
- {
- vec<f64, N> r = atan2k(abs(y), x);
- constexpr f64 pi = 3.1415926535897932384626433832795;
- constexpr f64 pi_over_2 = 1.5707963267948966192313216916398;
- constexpr f64 pi_over_4 = 0.78539816339744830961566084581988;
- r = mulsign(r, x);
- r = select(isinf(x) || x == 0.0, pi_over_2 - select(x, mulsign(pi_over_2, x), 0.0), r);
- r = select(isinf(y), pi_over_2 - select(x, mulsign(pi_over_4, x), 0.0), r);
- r = select(y == 0.0, fbitcast(ibitcast(sign(x) == -1.0) & ibitcast(pi)), r);
- r = fbitcast(ibitcast(isnan(x) || isnan(y)) | ibitcast(mulsign(r, y)));
- return r;
- }
- template <size_t N>
- KFR_SINTRIN vec<f32, N> atan(vec<f32, N> s)
- {
- vec<f32, N> t, u;
- vec<i32, N> q;
- q = select(s < 0.f, 2, 0);
- s = select(s < 0.f, -s, s);
- q = select(s > 1.f, q | 1, q);
- s = select(s > 1.f, 1.0f / s, s);
- t = s * s;
- u = 0.00282363896258175373077393f;
- u = fmadd(u, t, -0.0159569028764963150024414f);
- u = fmadd(u, t, 0.0425049886107444763183594f);
- u = fmadd(u, t, -0.0748900920152664184570312f);
- u = fmadd(u, t, 0.106347933411598205566406f);
- u = fmadd(u, t, -0.142027363181114196777344f);
- u = fmadd(u, t, 0.199926957488059997558594f);
- u = fmadd(u, t, -0.333331018686294555664062f);
- t = s + s * (t * u);
- t = select((q & 1) != 0, 1.570796326794896557998982f - t, t);
- t = select((q & 2) != 0, -t, t);
- return t;
- }
- template <size_t N>
- KFR_SINTRIN vec<f64, N> atan(vec<f64, N> s)
- {
- vec<f64, N> t, u;
- vec<i64, N> q;
- q = select(s < 0.0, 2ll, 0ll);
- s = select(s < 0.0, -s, s);
- q = select(s > 1.0, q | 1, q);
- s = select(s > 1.0, 1.0 / s, s);
- t = s * s;
- u = -1.88796008463073496563746e-05;
- u = fmadd(u, t, 0.000209850076645816976906797);
- u = fmadd(u, t, -0.00110611831486672482563471);
- u = fmadd(u, t, 0.00370026744188713119232403);
- u = fmadd(u, t, -0.00889896195887655491740809);
- u = fmadd(u, t, 0.016599329773529201970117);
- u = fmadd(u, t, -0.0254517624932312641616861);
- u = fmadd(u, t, 0.0337852580001353069993897);
- u = fmadd(u, t, -0.0407629191276836500001934);
- u = fmadd(u, t, 0.0466667150077840625632675);
- u = fmadd(u, t, -0.0523674852303482457616113);
- u = fmadd(u, t, 0.0587666392926673580854313);
- u = fmadd(u, t, -0.0666573579361080525984562);
- u = fmadd(u, t, 0.0769219538311769618355029);
- u = fmadd(u, t, -0.090908995008245008229153);
- u = fmadd(u, t, 0.111111105648261418443745);
- u = fmadd(u, t, -0.14285714266771329383765);
- u = fmadd(u, t, 0.199999999996591265594148);
- u = fmadd(u, t, -0.333333333333311110369124);
- t = s + s * (t * u);
- t = select((q & 1) != 0, 1.570796326794896557998982 - t, t);
- t = select((q & 2) != 0, -t, t);
- return t;
- }
- template <typename T>
- KFR_SINTRIN T atandeg(const T& x)
- {
- return atan(x) * c_radtodeg<T>;
- }
- template <typename T1, typename T2>
- KFR_SINTRIN common_type<T1, T2> atan2deg(const T1& y, const T2& x)
- {
- return atan2(y, x) * c_radtodeg<common_type<T1, T2>>;
- }
- KFR_HANDLE_SCALAR(atan)
- KFR_HANDLE_SCALAR(atan2)
- KFR_SPEC_FN(in_atan, atan)
- KFR_SPEC_FN(in_atan, atandeg)
- KFR_SPEC_FN(in_atan, atan2)
- KFR_SPEC_FN(in_atan, atan2deg)
-};
+template <size_t N>
+KFR_SINTRIN vec<f64, N> atan2k(vec<f64, N> y, vec<f64, N> x)
+{
+ vec<f64, N> s, t, u;
+ vec<i64, N> q;
+ q = select(x < 0, -2ll, 0ll);
+ x = select(x < 0, -x, x);
+ vec<i64, N> m;
+ m = y > x;
+ t = x;
+ x = select(m, y, x);
+ y = select(m, -t, y);
+ q = select(m, q + 1ll, q);
+ s = y / x;
+ t = s * s;
+ u = -1.88796008463073496563746e-05;
+ u = fmadd(u, t, 0.000209850076645816976906797);
+ u = fmadd(u, t, -0.00110611831486672482563471);
+ u = fmadd(u, t, 0.00370026744188713119232403);
+ u = fmadd(u, t, -0.00889896195887655491740809);
+ u = fmadd(u, t, 0.016599329773529201970117);
+ u = fmadd(u, t, -0.0254517624932312641616861);
+ u = fmadd(u, t, 0.0337852580001353069993897);
+ u = fmadd(u, t, -0.0407629191276836500001934);
+ u = fmadd(u, t, 0.0466667150077840625632675);
+ u = fmadd(u, t, -0.0523674852303482457616113);
+ u = fmadd(u, t, 0.0587666392926673580854313);
+ u = fmadd(u, t, -0.0666573579361080525984562);
+ u = fmadd(u, t, 0.0769219538311769618355029);
+ u = fmadd(u, t, -0.090908995008245008229153);
+ u = fmadd(u, t, 0.111111105648261418443745);
+ u = fmadd(u, t, -0.14285714266771329383765);
+ u = fmadd(u, t, 0.199999999996591265594148);
+ u = fmadd(u, t, -0.333333333333311110369124);
+ t = u * t * s + s;
+ t = cast<f64>(q) * 1.5707963267948966192313216916398 + t;
+ return t;
}
-namespace native
+
+template <size_t N>
+KFR_SINTRIN vec<f32, N> atan2(vec<f32, N> y, vec<f32, N> x)
{
-using fn_atan = internal::in_atan<>::fn_atan;
-template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)>
-KFR_INLINE ftype<common_type<T1, T2>> atan(const T1& y, const T2& x)
+ vec<f32, N> r = atan2k(abs(y), x);
+ constexpr f32 pi = 3.1415926535897932384626433832795f;
+ constexpr f32 pi_over_2 = 1.5707963267948966192313216916398f;
+ constexpr f32 pi_over_4 = 0.78539816339744830961566084581988f;
+ r = mulsign(r, x);
+ r = select(isinf(x) || x == 0.0f, pi_over_2 - select(x.asmask(), mulsign(pi_over_2, x), 0.0f), r);
+ r = select(isinf(y), pi_over_2 - select(x.asmask(), mulsign(pi_over_4, x), 0.0f), r);
+ r = select(y == 0.0f, fbitcast(ibitcast(x < 0) & ibitcast(pi)), r);
+ r = fbitcast(ibitcast(isnan(x) || isnan(y)) | ibitcast(mulsign(r, y)));
+ return r;
+}
+
+template <size_t N>
+KFR_SINTRIN vec<f64, N> atan2(vec<f64, N> y, vec<f64, N> x)
{
- return internal::in_atan<>::atan(y, x);
+ vec<f64, N> r = atan2k(abs(y), x);
+ constexpr f64 pi = 3.1415926535897932384626433832795;
+ constexpr f64 pi_over_2 = 1.5707963267948966192313216916398;
+ constexpr f64 pi_over_4 = 0.78539816339744830961566084581988;
+ r = mulsign(r, x);
+ r = select(isinf(x) || x == 0.0, pi_over_2 - select(x, mulsign(pi_over_2, x), 0.0), r);
+ r = select(isinf(y), pi_over_2 - select(x, mulsign(pi_over_4, x), 0.0), r);
+ r = select(y == 0.0, fbitcast(ibitcast(x < 0) & ibitcast(pi)), r);
+ r = fbitcast(ibitcast(isnan(x) || isnan(y)) | ibitcast(mulsign(r, y)));
+ return r;
}
-template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)>
-KFR_INLINE expr_func<fn_atan, E1, E2> atan(E1&& y, E2&& x)
+
+template <size_t N>
+KFR_SINTRIN vec<f32, N> atan(vec<f32, N> s)
{
- return { fn_atan(), std::forward<E1>(y), std::forward<E2>(x) };
+ vec<f32, N> t, u;
+ vec<i32, N> q;
+ q = select(s < 0.f, 2, 0);
+ s = select(s < 0.f, -s, s);
+ q = select(s > 1.f, q | 1, q);
+ s = select(s > 1.f, 1.0f / s, s);
+ t = s * s;
+ u = 0.00282363896258175373077393f;
+ u = fmadd(u, t, -0.0159569028764963150024414f);
+ u = fmadd(u, t, 0.0425049886107444763183594f);
+ u = fmadd(u, t, -0.0748900920152664184570312f);
+ u = fmadd(u, t, 0.106347933411598205566406f);
+ u = fmadd(u, t, -0.142027363181114196777344f);
+ u = fmadd(u, t, 0.199926957488059997558594f);
+ u = fmadd(u, t, -0.333331018686294555664062f);
+ t = s + s * (t * u);
+ t = select((q & 1) != 0, 1.570796326794896557998982f - t, t);
+ t = select((q & 2) != 0, -t, t);
+ return t;
}
-using fn_atan2 = internal::in_atan<>::fn_atan2;
-template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)>
-KFR_INLINE ftype<common_type<T1, T2>> atan2(const T1& y, const T2& x)
+
+template <size_t N>
+KFR_SINTRIN vec<f64, N> atan(vec<f64, N> s)
{
- return internal::in_atan<>::atan2(y, x);
+ vec<f64, N> t, u;
+ vec<i64, N> q;
+ q = select(s < 0.0, 2ll, 0ll);
+ s = select(s < 0.0, -s, s);
+ q = select(s > 1.0, q | 1, q);
+ s = select(s > 1.0, 1.0 / s, s);
+ t = s * s;
+ u = -1.88796008463073496563746e-05;
+ u = fmadd(u, t, 0.000209850076645816976906797);
+ u = fmadd(u, t, -0.00110611831486672482563471);
+ u = fmadd(u, t, 0.00370026744188713119232403);
+ u = fmadd(u, t, -0.00889896195887655491740809);
+ u = fmadd(u, t, 0.016599329773529201970117);
+ u = fmadd(u, t, -0.0254517624932312641616861);
+ u = fmadd(u, t, 0.0337852580001353069993897);
+ u = fmadd(u, t, -0.0407629191276836500001934);
+ u = fmadd(u, t, 0.0466667150077840625632675);
+ u = fmadd(u, t, -0.0523674852303482457616113);
+ u = fmadd(u, t, 0.0587666392926673580854313);
+ u = fmadd(u, t, -0.0666573579361080525984562);
+ u = fmadd(u, t, 0.0769219538311769618355029);
+ u = fmadd(u, t, -0.090908995008245008229153);
+ u = fmadd(u, t, 0.111111105648261418443745);
+ u = fmadd(u, t, -0.14285714266771329383765);
+ u = fmadd(u, t, 0.199999999996591265594148);
+ u = fmadd(u, t, -0.333333333333311110369124);
+ t = s + s * (t * u);
+ t = select((q & 1) != 0, 1.570796326794896557998982 - t, t);
+ t = select((q & 2) != 0, -t, t);
+ return t;
}
-template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)>
-KFR_INLINE expr_func<fn_atan2, E1, E2> atan2(E1&& y, E2&& x)
+
+template <typename T>
+KFR_SINTRIN T atandeg(const T& x)
{
- return { fn_atan2(), std::forward<E1>(y), std::forward<E2>(x) };
+ return atan(x) * c_radtodeg<T>;
+}
+
+template <typename T1, typename T2>
+KFR_SINTRIN common_type<T1, T2> atan2deg(const T1& y, const T2& x)
+{
+ return atan2(y, x) * c_radtodeg<common_type<T1, T2>>;
+}
+
+KFR_HANDLE_SCALAR(atan)
+KFR_HANDLE_SCALAR(atan2)
+KFR_FN(atan)
+KFR_FN(atandeg)
+KFR_FN(atan2)
+KFR_FN(atan2deg)
}
-using fn_atandeg = internal::in_atan<>::fn_atandeg;
+
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+KFR_INTRIN ftype<T1> atan(const T1& x)
+{
+ return internal::atan(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+KFR_INTRIN expr_func<internal::fn_atan, E1> atan(E1&& x)
+{
+ return { {}, std::forward<E1>(x) };
+}
+
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+KFR_INTRIN ftype<T1> atandeg(const T1& x)
+{
+ return internal::atandeg(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+KFR_INTRIN expr_func<internal::fn_atandeg, E1> atandeg(E1&& x)
+{
+ return { {}, std::forward<E1>(x) };
+}
+
template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)>
-KFR_INLINE ftype<common_type<T1, T2>> atandeg(const T1& y, const T2& x)
+KFR_INTRIN common_type<T1, T2> atan2(const T1& x, const T2& y)
{
- return internal::in_atan<>::atandeg(y, x);
+ return internal::atan2(x, y);
}
+
template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)>
-KFR_INLINE expr_func<fn_atandeg, E1, E2> atandeg(E1&& y, E2&& x)
+KFR_INTRIN expr_func<internal::fn_atan2, E1, E2> atan2(E1&& x, E2&& y)
{
- return { fn_atandeg(), std::forward<E1>(y), std::forward<E2>(x) };
+ return { {}, std::forward<E1>(x), std::forward<E2>(y) };
}
-using fn_atan2deg = internal::in_atan<>::fn_atan2deg;
+
template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)>
-KFR_INLINE ftype<common_type<T1, T2>> atan2deg(const T1& y, const T2& x)
+KFR_INTRIN common_type<T1, T2> atan2deg(const T1& x, const T2& y)
{
- return internal::in_atan<>::atan2deg(y, x);
+ return internal::atan2deg(x, y);
}
+
template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)>
-KFR_INLINE expr_func<fn_atan2deg, E1, E2> atan2deg(E1&& y, E2&& x)
+KFR_INTRIN expr_func<internal::fn_atan2deg, E1, E2> atan2deg(E1&& x, E2&& y)
{
- return { fn_atan2deg(), std::forward<E1>(y), std::forward<E2>(x) };
-}
+ return { {}, std::forward<E1>(x), std::forward<E2>(y) };
}
}
-#pragma clang diagnostic pop
diff --git a/include/kfr/base/clamp.hpp b/include/kfr/base/clamp.hpp
@@ -0,0 +1,73 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+
+#include "min_max.hpp"
+
+namespace kfr
+{
+
+namespace internal
+{
+
+template <typename T, size_t N>
+KFR_SINTRIN vec<T, N> clamp(vec<T, N> x, vec<T, N> lo, vec<T, N> hi)
+{
+ return max(min(x, hi), lo);
+}
+
+template <typename T, size_t N>
+KFR_SINTRIN vec<T, N> clamp(vec<T, N> x, vec<T, N> hi)
+{
+ return max(min(x, hi), zerovector<T, N>());
+}
+
+KFR_I_FN(clamp)
+}
+
+template <typename T1, typename T2, typename T3, KFR_ENABLE_IF(is_numeric_args<T1, T2, T3>::value),
+ typename Tout = common_type<T1, T2, T3>>
+KFR_INTRIN Tout clamp(const T1& x, const T2& lo, const T3& hi)
+{
+ return internal::clamp(static_cast<Tout>(x), static_cast<Tout>(lo), static_cast<Tout>(hi));
+}
+
+template <typename E1, typename E2, typename E3, KFR_ENABLE_IF(is_input_expressions<E1, E2, E3>::value)>
+KFR_INTRIN expr_func<internal::fn_clamp, E1, E2, E3> clamp(E1&& x, E2&& lo, E3&& hi)
+{
+ return { {}, std::forward<E1>(x), std::forward<E2>(lo), std::forward<E3>(hi) };
+}
+
+template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value),
+ typename Tout = common_type<T1, T2>>
+KFR_INTRIN Tout clamp(const T1& x, const T2& hi)
+{
+ return internal::clamp(static_cast<Tout>(x), static_cast<Tout>(hi));
+}
+
+template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)>
+KFR_INTRIN expr_func<internal::fn_clamp, E1, E2> clamp(E1&& x, E2&& hi)
+{
+ return { {}, std::forward<E1>(x), std::forward<E2>(hi) };
+}
+}
diff --git a/include/kfr/base/complex.hpp b/include/kfr/base/complex.hpp
@@ -79,9 +79,31 @@ struct complex
T re;
T im;
};
-
#endif
#endif
+}
+namespace cometa
+{
+template <typename T>
+struct compound_type_traits<kfr::complex<T>>
+{
+ constexpr static size_t width = 2;
+ using subtype = T;
+ using deep_subtype = cometa::deep_subtype<T>;
+ constexpr static bool is_scalar = false;
+ template <typename U>
+ using rebind = kfr::complex<U>;
+ template <typename U>
+ using deep_rebind = kfr::complex<cometa::deep_rebind<subtype, U>>;
+
+ static constexpr subtype at(const kfr::complex<T>& value, size_t index)
+ {
+ return index == 0 ? value.real() : value.imag();
+ }
+};
+}
+namespace kfr
+{
using c32 = complex<f32>;
using c64 = complex<f64>;
@@ -262,363 +284,280 @@ constexpr KFR_INLINE complex<T> make_complex(T1 real, T2 imag = T2(0))
namespace internal
{
-template <cpu_t c = cpu_t::native>
-struct in_complex : in_select<c>, in_sin_cos<c>, in_hyperbolic<c>, in_sqrt<c>, in_atan<c>, in_log_exp<c>
-{
- constexpr static cpu_t cur = c;
- using in_sqrt<c>::sqrt;
- using in_sin_cos<c>::sincos;
- using in_sin_cos<c>::cossin;
- using in_hyperbolic<c>::sinhcosh;
- using in_hyperbolic<c>::coshsinh;
- using in_atan<c>::atan2;
- using in_log_exp<c>::log;
- using in_log_exp<c>::log2;
- using in_log_exp<c>::log10;
- using in_log_exp<c>::exp;
- using in_log_exp<c>::exp2;
- using in_log_exp<c>::exp10;
-
- template <typename T, size_t N>
- KFR_SINTRIN vec<complex<T>, N> csin(const vec<complex<T>, N>& x)
- {
- return ccomp(sincos(cdecom(cdupreal(x))) * coshsinh(cdecom(cdupimag(x))));
- }
- template <typename T, size_t N>
- KFR_SINTRIN vec<complex<T>, N> csinh(const vec<complex<T>, N>& x)
- {
- return ccomp(sinhcosh(cdecom(cdupreal(x))) * cossin(cdecom(cdupimag(x))));
- }
- template <typename T, size_t N>
- KFR_SINTRIN vec<complex<T>, N> ccos(const vec<complex<T>, N>& x)
- {
- return ccomp(negodd(cossin(cdecom(cdupreal(x))) * coshsinh(cdecom(cdupimag(x)))));
- }
- template <typename T, size_t N>
- KFR_SINTRIN vec<complex<T>, N> ccosh(const vec<complex<T>, N>& x)
- {
- return ccomp(coshsinh(cdecom(cdupreal(x))) * cossin(cdecom(cdupimag(x))));
- }
-
- template <typename T, size_t N>
- KFR_SINTRIN vec<T, N> cabs(const vec<complex<T>, N>& x)
- {
- const vec<T, N* 2> xx = sqr(cdecom(x));
- return sqrt(even(xx) + odd(xx));
- }
- template <typename T, size_t N>
- KFR_SINTRIN vec<T, N> carg(const vec<complex<T>, N>& x)
- {
- const vec<T, N* 2> xx = cdecom(x);
- return atan2(even(xx), odd(xx));
- }
+template <typename T, size_t N>
+KFR_SINTRIN vec<complex<T>, N> csin(const vec<complex<T>, N>& x)
+{
+ return ccomp(sincos(cdecom(cdupreal(x))) * coshsinh(cdecom(cdupimag(x))));
+}
+template <typename T, size_t N>
+KFR_SINTRIN vec<complex<T>, N> csinh(const vec<complex<T>, N>& x)
+{
+ return ccomp(sinhcosh(cdecom(cdupreal(x))) * cossin(cdecom(cdupimag(x))));
+}
+template <typename T, size_t N>
+KFR_SINTRIN vec<complex<T>, N> ccos(const vec<complex<T>, N>& x)
+{
+ return ccomp(negodd(cossin(cdecom(cdupreal(x))) * coshsinh(cdecom(cdupimag(x)))));
+}
+template <typename T, size_t N>
+KFR_SINTRIN vec<complex<T>, N> ccosh(const vec<complex<T>, N>& x)
+{
+ return ccomp(coshsinh(cdecom(cdupreal(x))) * cossin(cdecom(cdupimag(x))));
+}
- template <typename T, size_t N>
- KFR_SINTRIN vec<complex<T>, N> clog(const vec<complex<T>, N>& x)
- {
- return make_complex(log(cabs(x)), carg(x));
- }
- template <typename T, size_t N>
- KFR_SINTRIN vec<complex<T>, N> clog2(const vec<complex<T>, N>& x)
- {
- return clog(x) * c_recip_log_2<T>;
- }
- template <typename T, size_t N>
- KFR_SINTRIN vec<complex<T>, N> clog10(const vec<complex<T>, N>& x)
- {
- return clog(x) * c_recip_log_10<T>;
- }
+template <typename T, size_t N>
+KFR_SINTRIN vec<T, N> cabs(const vec<complex<T>, N>& x)
+{
+ const vec<T, N* 2> xx = sqr(cdecom(x));
+ return sqrt(even(xx) + odd(xx));
+}
+template <typename T, size_t N>
+KFR_SINTRIN vec<T, N> carg(const vec<complex<T>, N>& x)
+{
+ const vec<T, N* 2> xx = cdecom(x);
+ return atan2(even(xx), odd(xx));
+}
- template <typename T, size_t N>
- KFR_SINTRIN vec<complex<T>, N> cexp(const vec<complex<T>, N>& x)
- {
- return ccomp(exp(cdecom(cdupreal(x))) * cossin(cdecom(cdupimag(x))));
- }
- template <typename T, size_t N>
- KFR_SINTRIN vec<complex<T>, N> cexp2(const vec<complex<T>, N>& x)
- {
- return cexp(x * c_log_2<T>);
- }
- template <typename T, size_t N>
- KFR_SINTRIN vec<complex<T>, N> cexp10(const vec<complex<T>, N>& x)
- {
- return cexp(x * c_log_10<T>);
- }
+template <typename T, size_t N>
+KFR_SINTRIN vec<complex<T>, N> clog(const vec<complex<T>, N>& x)
+{
+ return make_complex(log(cabs(x)), carg(x));
+}
+template <typename T, size_t N>
+KFR_SINTRIN vec<complex<T>, N> clog2(const vec<complex<T>, N>& x)
+{
+ return clog(x) * c_recip_log_2<T>;
+}
+template <typename T, size_t N>
+KFR_SINTRIN vec<complex<T>, N> clog10(const vec<complex<T>, N>& x)
+{
+ return clog(x) * c_recip_log_10<T>;
+}
- template <typename T, size_t N>
- KFR_SINTRIN vec<complex<T>, N> polar(const vec<complex<T>, N>& x)
- {
- return make_complex(cabs(x), carg(x));
- }
- template <typename T, size_t N>
- KFR_SINTRIN vec<complex<T>, N> cartesian(const vec<complex<T>, N>& x)
- {
- return cdupreal(x) * ccomp(cossin(cdecom(cdupimag(x))));
- }
+template <typename T, size_t N>
+KFR_SINTRIN vec<complex<T>, N> cexp(const vec<complex<T>, N>& x)
+{
+ return ccomp(exp(cdecom(cdupreal(x))) * cossin(cdecom(cdupimag(x))));
+}
+template <typename T, size_t N>
+KFR_SINTRIN vec<complex<T>, N> cexp2(const vec<complex<T>, N>& x)
+{
+ return cexp(x * c_log_2<T>);
+}
+template <typename T, size_t N>
+KFR_SINTRIN vec<complex<T>, N> cexp10(const vec<complex<T>, N>& x)
+{
+ return cexp(x * c_log_10<T>);
+}
- template <typename T, size_t N>
- KFR_SINTRIN vec<T, N> cabsdup(vec<T, N> x)
- {
- x = sqr(x);
- return sqrt(x + swap<2>(x));
- }
+template <typename T, size_t N>
+KFR_SINTRIN vec<complex<T>, N> polar(const vec<complex<T>, N>& x)
+{
+ return make_complex(cabs(x), carg(x));
+}
+template <typename T, size_t N>
+KFR_SINTRIN vec<complex<T>, N> cartesian(const vec<complex<T>, N>& x)
+{
+ return cdupreal(x) * ccomp(cossin(cdecom(cdupimag(x))));
+}
- template <typename T, size_t N>
- KFR_SINTRIN vec<complex<T>, N> csqrt(const vec<complex<T>, N>& x)
- {
- const vec<T, N> t = (cabsdup(cdecom(x)) + cdecom(cnegimag(cdupreal(x)))) * T(0.5);
- return ccomp(select(dupodd(x) < T(), cdecom(cnegimag(ccomp(t))), t));
- }
+template <typename T, size_t N>
+KFR_SINTRIN vec<T, N> cabsdup(vec<T, N> x)
+{
+ x = sqr(x);
+ return sqrt(x + swap<2>(x));
+}
- KFR_HANDLE_SCALAR(csin)
- KFR_HANDLE_SCALAR(csinh)
- KFR_HANDLE_SCALAR(ccos)
- KFR_HANDLE_SCALAR(ccosh)
- KFR_HANDLE_SCALAR(cabs)
- KFR_HANDLE_SCALAR(carg)
- KFR_HANDLE_SCALAR(clog)
- KFR_HANDLE_SCALAR(clog2)
- KFR_HANDLE_SCALAR(clog10)
- KFR_HANDLE_SCALAR(cexp)
- KFR_HANDLE_SCALAR(cexp2)
- KFR_HANDLE_SCALAR(cexp10)
- KFR_HANDLE_SCALAR(polar)
- KFR_HANDLE_SCALAR(cartesian)
- KFR_HANDLE_SCALAR(csqrt)
-
- KFR_SPEC_FN(in_complex, csin)
- KFR_SPEC_FN(in_complex, csinh)
- KFR_SPEC_FN(in_complex, ccos)
- KFR_SPEC_FN(in_complex, ccosh)
- KFR_SPEC_FN(in_complex, cabs)
- KFR_SPEC_FN(in_complex, carg)
- KFR_SPEC_FN(in_complex, clog)
- KFR_SPEC_FN(in_complex, clog2)
- KFR_SPEC_FN(in_complex, clog10)
- KFR_SPEC_FN(in_complex, cexp)
- KFR_SPEC_FN(in_complex, cexp2)
- KFR_SPEC_FN(in_complex, cexp10)
- KFR_SPEC_FN(in_complex, polar)
- KFR_SPEC_FN(in_complex, cartesian)
- KFR_SPEC_FN(in_complex, csqrt)
-};
+template <typename T, size_t N>
+KFR_SINTRIN vec<complex<T>, N> csqrt(const vec<complex<T>, N>& x)
+{
+ const vec<T, N> t = (cabsdup(cdecom(x)) + cdecom(cnegimag(cdupreal(x)))) * T(0.5);
+ return ccomp(select(dupodd(x) < T(), cdecom(cnegimag(ccomp(t))), t));
+}
+
+KFR_HANDLE_SCALAR(csin)
+KFR_HANDLE_SCALAR(csinh)
+KFR_HANDLE_SCALAR(ccos)
+KFR_HANDLE_SCALAR(ccosh)
+KFR_HANDLE_SCALAR(cabs)
+KFR_HANDLE_SCALAR(carg)
+KFR_HANDLE_SCALAR(clog)
+KFR_HANDLE_SCALAR(clog2)
+KFR_HANDLE_SCALAR(clog10)
+KFR_HANDLE_SCALAR(cexp)
+KFR_HANDLE_SCALAR(cexp2)
+KFR_HANDLE_SCALAR(cexp10)
+KFR_HANDLE_SCALAR(polar)
+KFR_HANDLE_SCALAR(cartesian)
+KFR_HANDLE_SCALAR(csqrt)
+
+KFR_FN(csin)
+KFR_FN(csinh)
+KFR_FN(ccos)
+KFR_FN(ccosh)
+KFR_FN(cabs)
+KFR_FN(carg)
+KFR_FN(clog)
+KFR_FN(clog2)
+KFR_FN(clog10)
+KFR_FN(cexp)
+KFR_FN(cexp2)
+KFR_FN(cexp10)
+KFR_FN(polar)
+KFR_FN(cartesian)
+KFR_FN(csqrt)
}
-namespace native
-{
-using fn_csin = internal::in_complex<>::fn_csin;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> csin(const T1& x)
+KFR_INTRIN T1 csin(const T1& x)
{
- return internal::in_complex<>::csin(x);
+ return internal::csin(x);
}
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_csin, E1> csin(E1&& x)
+KFR_INTRIN expr_func<internal::fn_csin, E1> csin(E1&& x)
{
return { {}, std::forward<E1>(x) };
}
-
-using fn_csinh = internal::in_complex<>::fn_csinh;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> csinh(const T1& x)
+KFR_INTRIN T1 csinh(const T1& x)
{
- return internal::in_complex<>::csinh(x);
+ return internal::csinh(x);
}
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_csinh, E1> csinh(E1&& x)
+KFR_INTRIN expr_func<internal::fn_csinh, E1> csinh(E1&& x)
{
return { {}, std::forward<E1>(x) };
}
-
-using fn_ccos = internal::in_complex<>::fn_ccos;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> ccos(const T1& x)
+KFR_INTRIN T1 ccos(const T1& x)
{
- return internal::in_complex<>::ccos(x);
+ return internal::ccos(x);
}
-
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_ccos, E1> ccos(E1&& x)
+KFR_INTRIN expr_func<internal::fn_ccos, E1> ccos(E1&& x)
{
- return { fn_ccos(), std::forward<E1>(x) };
+ return { {}, std::forward<E1>(x) };
}
-
-using fn_ccosh = internal::in_complex<>::fn_ccosh;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> ccosh(const T1& x)
+KFR_INTRIN T1 ccosh(const T1& x)
{
- return internal::in_complex<>::ccosh(x);
+ return internal::ccosh(x);
}
-
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_ccosh, E1> ccosh(E1&& x)
+KFR_INTRIN expr_func<internal::fn_ccosh, E1> ccosh(E1&& x)
{
- return { fn_ccosh(), std::forward<E1>(x) };
+ return { {}, std::forward<E1>(x) };
}
-
-using fn_cabs = internal::in_complex<>::fn_cabs;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN realftype<T1> cabs(const T1& x)
+KFR_INTRIN realtype<T1> cabs(const T1& x)
{
- return internal::in_complex<>::cabs(x);
+ return internal::cabs(x);
}
-
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_cabs, E1> cabs(E1&& x)
+KFR_INTRIN expr_func<internal::fn_cabs, E1> cabs(E1&& x)
{
- return { fn_cabs(), std::forward<E1>(x) };
+ return { {}, std::forward<E1>(x) };
}
-
-using fn_carg = internal::in_complex<>::fn_carg;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN realftype<T1> carg(const T1& x)
+KFR_INTRIN realtype<T1> carg(const T1& x)
{
- return internal::in_complex<>::carg(x);
+ return internal::carg(x);
}
-
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_carg, E1> carg(E1&& x)
+KFR_INTRIN expr_func<internal::fn_carg, E1> carg(E1&& x)
{
- return { fn_carg(), std::forward<E1>(x) };
+ return { {}, std::forward<E1>(x) };
}
-
-using fn_clog = internal::in_complex<>::fn_clog;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> clog(const T1& x)
+KFR_INTRIN T1 clog(const T1& x)
{
- return internal::in_complex<>::clog(x);
+ return internal::clog(x);
}
-
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_clog, E1> clog(E1&& x)
+KFR_INTRIN expr_func<internal::fn_clog, E1> clog(E1&& x)
{
- return { fn_clog(), std::forward<E1>(x) };
+ return { {}, std::forward<E1>(x) };
}
-
-using fn_clog2 = internal::in_complex<>::fn_clog2;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> clog2(const T1& x)
+KFR_INTRIN T1 clog2(const T1& x)
{
- return internal::in_complex<>::clog2(x);
+ return internal::clog2(x);
}
-
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_clog2, E1> clog2(E1&& x)
+KFR_INTRIN expr_func<internal::fn_clog2, E1> clog2(E1&& x)
{
- return { fn_clog2(), std::forward<E1>(x) };
+ return { {}, std::forward<E1>(x) };
}
-
-using fn_clog10 = internal::in_complex<>::fn_clog10;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> clog10(const T1& x)
+KFR_INTRIN T1 clog10(const T1& x)
{
- return internal::in_complex<>::clog10(x);
+ return internal::clog10(x);
}
-
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_clog10, E1> clog10(E1&& x)
+KFR_INTRIN expr_func<internal::fn_clog10, E1> clog10(E1&& x)
{
- return { fn_clog10(), std::forward<E1>(x) };
+ return { {}, std::forward<E1>(x) };
}
-
-using fn_cexp = internal::in_complex<>::fn_cexp;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> cexp(const T1& x)
+KFR_INTRIN T1 cexp(const T1& x)
{
- return internal::in_complex<>::cexp(x);
+ return internal::cexp(x);
}
-
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_cexp, E1> cexp(E1&& x)
+KFR_INTRIN expr_func<internal::fn_cexp, E1> cexp(E1&& x)
{
- return { fn_cexp(), std::forward<E1>(x) };
+ return { {}, std::forward<E1>(x) };
}
-
-using fn_cexp2 = internal::in_complex<>::fn_cexp2;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> cexp2(const T1& x)
+KFR_INTRIN T1 cexp2(const T1& x)
{
- return internal::in_complex<>::cexp2(x);
+ return internal::cexp2(x);
}
-
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_cexp2, E1> cexp2(E1&& x)
+KFR_INTRIN expr_func<internal::fn_cexp2, E1> cexp2(E1&& x)
{
- return { fn_cexp2(), std::forward<E1>(x) };
+ return { {}, std::forward<E1>(x) };
}
-
-using fn_cexp10 = internal::in_complex<>::fn_cexp10;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> cexp10(const T1& x)
+KFR_INTRIN T1 cexp10(const T1& x)
{
- return internal::in_complex<>::cexp10(x);
+ return internal::cexp10(x);
}
-
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_cexp10, E1> cexp10(E1&& x)
+KFR_INTRIN expr_func<internal::fn_cexp10, E1> cexp10(E1&& x)
{
- return { fn_cexp10(), std::forward<E1>(x) };
+ return { {}, std::forward<E1>(x) };
}
-
-using fn_polar = internal::in_complex<>::fn_polar;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> polar(const T1& x)
+KFR_INTRIN T1 polar(const T1& x)
{
- return internal::in_complex<>::polar(x);
+ return internal::polar(x);
}
-
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_polar, E1> polar(E1&& x)
+KFR_INTRIN expr_func<internal::fn_polar, E1> polar(E1&& x)
{
- return { fn_polar(), std::forward<E1>(x) };
+ return { {}, std::forward<E1>(x) };
}
-
-using fn_cartesian = internal::in_complex<>::fn_cartesian;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> cartesian(const T1& x)
+KFR_INTRIN T1 cartesian(const T1& x)
{
- return internal::in_complex<>::cartesian(x);
+ return internal::cartesian(x);
}
-
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_cartesian, E1> cartesian(E1&& x)
+KFR_INTRIN expr_func<internal::fn_cartesian, E1> cartesian(E1&& x)
{
- return { fn_cartesian(), std::forward<E1>(x) };
+ return { {}, std::forward<E1>(x) };
}
-
-using fn_csqrt = internal::in_complex<>::fn_csqrt;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> csqrt(const T1& x)
+KFR_INTRIN T1 csqrt(const T1& x)
{
- return internal::in_complex<>::csqrt(x);
+ return internal::csqrt(x);
}
-
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_csqrt, E1> csqrt(E1&& x)
+KFR_INTRIN expr_func<internal::fn_csqrt, E1> csqrt(E1&& x)
{
- return { fn_csqrt(), std::forward<E1>(x) };
-}
-}
+ return { {}, std::forward<E1>(x) };
}
-namespace cometa
-{
-template <typename T>
-struct compound_type_traits<kfr::complex<T>>
-{
- constexpr static size_t width = 2;
- using subtype = T;
- using deep_subtype = cometa::deep_subtype<T>;
- constexpr static bool is_scalar = false;
- template <typename U>
- using rebind = kfr::complex<U>;
- template <typename U>
- using deep_rebind = kfr::complex<cometa::deep_rebind<subtype, U>>;
-
- static constexpr subtype at(const kfr::complex<T>& value, size_t index)
- {
- return index == 0 ? value.real() : value.imag();
- }
-};
}
#pragma clang diagnostic pop
diff --git a/include/kfr/base/gamma.hpp b/include/kfr/base/gamma.hpp
@@ -41,63 +41,51 @@ constexpr T gamma_precalc[] = {
-0x1.62f981f01cf84p+8, 0x5.a937aa5c48d98p+0, -0x3.c640bf82e2104p-8, 0xc.914c540f959cp-24,
};
-template <cpu_t c = cpu_t::native, cpu_t cc = c>
-struct in_gamma : in_log_exp<cc>
+template <typename T, size_t N>
+KFR_SINTRIN vec<T, N> gamma(vec<T, N> z)
{
-private:
- using in_log_exp<cc>::exp;
- using in_log_exp<cc>::pow;
-
-public:
- template <typename T, size_t N>
- KFR_SINTRIN vec<T, N> gamma(vec<T, N> z)
- {
- constexpr size_t Count = arraysize(internal::gamma_precalc<T>);
- vec<T, N> accm = gamma_precalc<T>[0];
- KFR_LOOP_UNROLL
- for (size_t k = 1; k < Count; k++)
- accm += gamma_precalc<T>[k] / (z + cast<utype<T>>(k));
- accm *= exp(-(z + Count)) * pow(z + Count, z + 0.5);
- return accm / z;
- }
-
- template <typename T, size_t N>
- KFR_SINTRIN vec<T, N> factorial_approx(vec<T, N> x)
- {
- return gamma(x + T(1));
- }
- KFR_SPEC_FN(in_gamma, gamma)
- KFR_SPEC_FN(in_gamma, factorial_approx)
-};
+ constexpr size_t Count = arraysize(gamma_precalc<T>);
+ vec<T, N> accm = gamma_precalc<T>[0];
+ KFR_LOOP_UNROLL
+ for (size_t k = 1; k < Count; k++)
+ accm += gamma_precalc<T>[k] / (z + cast<utype<T>>(k));
+ accm *= exp(-(z + Count)) * pow(z + Count, z + 0.5);
+ return accm / z;
}
-namespace native
+template <typename T, size_t N>
+KFR_SINTRIN vec<T, N> factorial_approx(vec<T, N> x)
{
-using fn_gamma = internal::in_gamma<>::fn_gamma;
+ return gamma(x + T(1));
+}
+KFR_HANDLE_SCALAR(gamma)
+KFR_HANDLE_SCALAR(factorial_approx)
+KFR_FN(gamma)
+KFR_FN(factorial_approx)
+}
+
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> gamma(const T1& x)
+KFR_INTRIN T1 gamma(const T1& x)
{
- return internal::in_gamma<>::gamma(x);
+ return internal::gamma(x);
}
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_gamma, E1> gamma(E1&& x)
+KFR_INTRIN expr_func<internal::fn_gamma, E1> gamma(E1&& x)
{
- return { fn_gamma(), std::forward<E1>(x) };
+ return { {}, std::forward<E1>(x) };
}
-using fn_factorial_approx = internal::in_gamma<>::fn_factorial_approx;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> factorial_approx(const T1& x)
+KFR_INTRIN T1 factorial_approx(const T1& x)
{
- return internal::in_gamma<>::factorial_approx(x);
+ return internal::factorial_approx(x);
}
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_factorial_approx, E1> factorial_approx(E1&& x)
+KFR_INTRIN expr_func<internal::fn_factorial_approx, E1> factorial_approx(E1&& x)
{
- return { fn_factorial_approx(), std::forward<E1>(x) };
-}
+ return { {}, std::forward<E1>(x) };
}
}
diff --git a/include/kfr/base/hyperbolic.hpp b/include/kfr/base/hyperbolic.hpp
@@ -35,149 +35,131 @@ namespace kfr
namespace internal
{
-template <cpu_t c = cpu_t::native>
-struct in_hyperbolic : in_log_exp<c>
-{
- constexpr static cpu_t cur = c;
-
-private:
- using in_log_exp<c>::exp;
-
-public:
- template <typename T, size_t N>
- KFR_SINTRIN vec<T, N> sinh(vec<T, N> x)
- {
- return (exp(x) - exp(-x)) * T(0.5);
- }
-
- template <typename T, size_t N>
- KFR_SINTRIN vec<T, N> cosh(vec<T, N> x)
- {
- return (exp(x) + exp(-x)) * T(0.5);
- }
-
- template <typename T, size_t N>
- KFR_SINTRIN vec<T, N> tanh(vec<T, N> x)
- {
- x = -2 * x;
- return (1 - exp(x)) / (1 + exp(x));
- }
-
- template <typename T, size_t N>
- KFR_SINTRIN vec<T, N> coth(vec<T, N> x)
- {
- x = -2 * x;
- return (1 + exp(x)) / (1 - exp(x));
- }
-
- template <typename T, size_t N, KFR_ENABLE_IF(N > 1)>
- KFR_SINTRIN vec<T, N> sinhcosh(vec<T, N> x)
- {
- const vec<T, N> a = exp(x);
- const vec<T, N> b = exp(-x);
- return subadd(a, b) * T(0.5);
- }
-
- template <typename T, size_t N, KFR_ENABLE_IF(N > 1)>
- KFR_SINTRIN vec<T, N> coshsinh(vec<T, N> x)
- {
- const vec<T, N> a = exp(x);
- const vec<T, N> b = exp(-x);
- return addsub(a, b) * T(0.5);
- }
- KFR_HANDLE_SCALAR(sinh)
- KFR_HANDLE_SCALAR(cosh)
- KFR_HANDLE_SCALAR(tanh)
- KFR_HANDLE_SCALAR(coth)
- KFR_HANDLE_SCALAR(sinhcosh)
- KFR_HANDLE_SCALAR(coshsinh)
- KFR_SPEC_FN(in_hyperbolic, sinh)
- KFR_SPEC_FN(in_hyperbolic, cosh)
- KFR_SPEC_FN(in_hyperbolic, tanh)
- KFR_SPEC_FN(in_hyperbolic, coth)
- KFR_SPEC_FN(in_hyperbolic, sinhcosh)
- KFR_SPEC_FN(in_hyperbolic, coshsinh)
-};
-}
-
-namespace native
-{
-using fn_sinh = internal::in_hyperbolic<>::fn_sinh;
+template <typename T, size_t N>
+KFR_SINTRIN vec<T, N> sinh(vec<T, N> x)
+{
+ return (exp(x) - exp(-x)) * T(0.5);
+}
+
+template <typename T, size_t N>
+KFR_SINTRIN vec<T, N> cosh(vec<T, N> x)
+{
+ return (exp(x) + exp(-x)) * T(0.5);
+}
+
+template <typename T, size_t N>
+KFR_SINTRIN vec<T, N> tanh(vec<T, N> x)
+{
+ x = -2 * x;
+ return (1 - exp(x)) / (1 + exp(x));
+}
+
+template <typename T, size_t N>
+KFR_SINTRIN vec<T, N> coth(vec<T, N> x)
+{
+ x = -2 * x;
+ return (1 + exp(x)) / (1 - exp(x));
+}
+
+template <typename T, size_t N, KFR_ENABLE_IF(N > 1)>
+KFR_SINTRIN vec<T, N> sinhcosh(vec<T, N> x)
+{
+ const vec<T, N> a = exp(x);
+ const vec<T, N> b = exp(-x);
+ return subadd(a, b) * T(0.5);
+}
+
+template <typename T, size_t N, KFR_ENABLE_IF(N > 1)>
+KFR_SINTRIN vec<T, N> coshsinh(vec<T, N> x)
+{
+ const vec<T, N> a = exp(x);
+ const vec<T, N> b = exp(-x);
+ return addsub(a, b) * T(0.5);
+}
+
+KFR_HANDLE_SCALAR(sinh)
+KFR_HANDLE_SCALAR(cosh)
+KFR_HANDLE_SCALAR(tanh)
+KFR_HANDLE_SCALAR(coth)
+KFR_HANDLE_SCALAR(sinhcosh)
+KFR_HANDLE_SCALAR(coshsinh)
+KFR_FN(sinh)
+KFR_FN(cosh)
+KFR_FN(tanh)
+KFR_FN(coth)
+KFR_FN(sinhcosh)
+KFR_FN(coshsinh)
+}
+
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> sinh(const T1& x)
+KFR_INTRIN T1 sinh(const T1& x)
{
- return internal::in_hyperbolic<>::sinh(x);
+ return internal::sinh(x);
}
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_sinh, E1> sinh(E1&& x)
+KFR_INTRIN expr_func<internal::fn_sinh, E1> sinh(E1&& x)
{
- return { fn_sinh(), std::forward<E1>(x) };
+ return { {}, std::forward<E1>(x) };
}
-using fn_cosh = internal::in_hyperbolic<>::fn_cosh;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> cosh(const T1& x)
+KFR_INTRIN T1 cosh(const T1& x)
{
- return internal::in_hyperbolic<>::cosh(x);
+ return internal::cosh(x);
}
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_cosh, E1> cosh(E1&& x)
+KFR_INTRIN expr_func<internal::fn_cosh, E1> cosh(E1&& x)
{
- return { fn_cosh(), std::forward<E1>(x) };
+ return { {}, std::forward<E1>(x) };
}
-using fn_tanh = internal::in_hyperbolic<>::fn_tanh;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> tanh(const T1& x)
+KFR_INTRIN T1 tanh(const T1& x)
{
- return internal::in_hyperbolic<>::tanh(x);
+ return internal::tanh(x);
}
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_tanh, E1> tanh(E1&& x)
+KFR_INTRIN expr_func<internal::fn_tanh, E1> tanh(E1&& x)
{
- return { fn_tanh(), std::forward<E1>(x) };
+ return { {}, std::forward<E1>(x) };
}
-using fn_coth = internal::in_hyperbolic<>::fn_coth;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> coth(const T1& x)
+KFR_INTRIN T1 coth(const T1& x)
{
- return internal::in_hyperbolic<>::coth(x);
+ return internal::coth(x);
}
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_coth, E1> coth(E1&& x)
+KFR_INTRIN expr_func<internal::fn_coth, E1> coth(E1&& x)
{
- return { fn_coth(), std::forward<E1>(x) };
+ return { {}, std::forward<E1>(x) };
}
-using fn_sinhcosh = internal::in_hyperbolic<>::fn_sinhcosh;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> sinhcosh(const T1& x)
+KFR_INTRIN T1 sinhcosh(const T1& x)
{
- return internal::in_hyperbolic<>::sinhcosh(x);
+ return internal::sinhcosh(x);
}
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_sinhcosh, E1> sinhcosh(E1&& x)
+KFR_INTRIN expr_func<internal::fn_sinhcosh, E1> sinhcosh(E1&& x)
{
- return { fn_sinhcosh(), std::forward<E1>(x) };
+ return { {}, std::forward<E1>(x) };
}
-using fn_coshsinh = internal::in_hyperbolic<>::fn_coshsinh;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> coshsinh(const T1& x)
+KFR_INTRIN T1 coshsinh(const T1& x)
{
- return internal::in_hyperbolic<>::coshsinh(x);
+ return internal::coshsinh(x);
}
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_coshsinh, E1> coshsinh(E1&& x)
+KFR_INTRIN expr_func<internal::fn_coshsinh, E1> coshsinh(E1&& x)
{
- return { fn_coshsinh(), std::forward<E1>(x) };
-}
+ return { {}, std::forward<E1>(x) };
}
}
diff --git a/include/kfr/base/log_exp.hpp b/include/kfr/base/log_exp.hpp
@@ -26,518 +26,490 @@
#include "constants.hpp"
#include "function.hpp"
#include "min_max.hpp"
+#include "clamp.hpp"
#include "operators.hpp"
#include "round.hpp"
#include "select.hpp"
#include "shuffle.hpp"
-#pragma clang diagnostic push
-#if CID_HAS_WARNING("-Winaccessible-base")
-#pragma clang diagnostic ignored "-Winaccessible-base"
-#endif
-
namespace kfr
{
namespace internal
{
-template <cpu_t c = cpu_t::native>
-struct in_log_exp : in_select<c>, in_min_max<c>, in_clamp<c>, in_round<c>, in_abs<c>
-{
-private:
- constexpr static cpu_t cur = c;
- using in_select<c>::select;
- using in_round<c>::floor;
- using in_clamp<c>::clamp;
- using in_abs<c>::abs;
-
-public:
- template <size_t N>
- KFR_SINTRIN vec<i32, N> vilogbp1(vec<f32, N> d)
- {
- mask<i32, N> m = d < 5.421010862427522E-20f;
- d = select(m, 1.8446744073709552E19f * d, d);
- vec<i32, N> q = (ibitcast(d) >> 23) & 0xff;
- q = select(m, q - (64 + 0x7e), q - 0x7e);
- return q;
- }
-
- template <size_t N>
- KFR_SINTRIN vec<i64, N> vilogbp1(vec<f64, N> d)
- {
- mask<i64, N> m = d < 4.9090934652977266E-91;
- d = select(m, 2.037035976334486E90 * d, d);
- vec<i64, N> q = (ibitcast(d) >> 52) & 0x7ff;
- q = select(m, q - (300 + 0x03fe), q - 0x03fe);
- return q;
- }
-
- template <size_t N>
- KFR_SINTRIN vec<f32, N> vldexpk(vec<f32, N> x, vec<i32, N> q)
- {
- vec<i32, N> m = q >> 31;
- m = (((m + q) >> 6) - m) << 4;
- q = q - (m << 2);
- m = clamp(m + 0x7f, vec<i32, N>(0xff));
- vec<f32, N> u = pow4(bitcast<f32>(cast<i32>(m) << 23));
- return x * u * bitcast<f32>((cast<i32>(q + 0x7f)) << 23);
- }
-
- template <size_t N>
- KFR_SINTRIN vec<f64, N> vldexpk(vec<f64, N> x, vec<i64, N> q)
- {
- vec<i64, N> m = q >> 31;
- m = (((m + q) >> 9) - m) << 7;
- q = q - (m << 2);
- m = clamp(m + 0x3ff, i64(0x7ff));
- vec<f64, N> u = pow4(bitcast<f64>(cast<i64>(m) << 52));
- return x * u * bitcast<f64>((cast<i64>(q + 0x3ff)) << 52);
- }
-
- template <typename T, size_t N>
- KFR_SINTRIN vec<T, N> logb(vec<T, N> x)
- {
- return select(x == T(), -c_infinity<T>, cast<T>(vilogbp1(x) - 1));
- }
-
- template <size_t N>
- KFR_SINTRIN vec<f32, N> log(vec<f32, N> d)
- {
- vec<i32, N> e = vilogbp1(d * 0.7071); // 0678118654752440084436210485f );
- vec<f32, N> m = vldexpk(d, -e);
-
- vec<f32, N> x = (m - 1.0f) / (m + 1.0f);
- vec<f32, N> x2 = x * x;
-
- vec<f32, N> sp = select(d < 0, c_qnan<f32>, c_neginfinity<f32>);
-
- vec<f32, N> t = 0.2371599674224853515625f;
- t = fmadd(t, x2, 0.285279005765914916992188f);
- t = fmadd(t, x2, 0.400005519390106201171875f);
- t = fmadd(t, x2, 0.666666567325592041015625f);
- t = fmadd(t, x2, 2.0f);
-
- x = x * t + c_log_2<f32> * cast<f32>(e);
- x = select(d > 0, x, sp);
-
- return x;
- }
-
- template <size_t N>
- KFR_SINTRIN vec<f64, N> log(vec<f64, N> d)
- {
- vec<i64, N> e = vilogbp1(d * 0.7071); // 0678118654752440084436210485 );
- vec<f64, N> m = vldexpk(d, -e);
-
- vec<f64, N> x = (m - 1.0) / (m + 1.0);
- vec<f64, N> x2 = x * x;
-
- vec<f64, N> sp = select(d < 0, c_qnan<f64>, c_neginfinity<f64>);
-
- vec<f64, N> t = 0.148197055177935105296783;
- t = fmadd(t, x2, 0.153108178020442575739679);
- t = fmadd(t, x2, 0.181837339521549679055568);
- t = fmadd(t, x2, 0.22222194152736701733275);
- t = fmadd(t, x2, 0.285714288030134544449368);
- t = fmadd(t, x2, 0.399999999989941956712869);
- t = fmadd(t, x2, 0.666666666666685503450651);
- t = fmadd(t, x2, 2);
-
- x = x * t + c_log_2<f64> * cast<f64>(e);
- x = select(d > 0, x, sp);
-
- return x;
- }
-
- template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)>
- KFR_SINTRIN vec<T, N> log2(vec<T, N> x)
- {
- return log(x) * c_recip_log_2<T>;
- }
- template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)>
- KFR_SINTRIN vec<T, N> log10(vec<T, N> x)
- {
- return log(x) * c_recip_log_10<T>;
- }
-
- template <size_t N>
- KFR_SINTRIN vec<f32, N> exp(vec<f32, N> d)
- {
- const f32 ln2_part1 = 0.6931457519f;
- const f32 ln2_part2 = 1.4286067653e-6f;
-
- vec<i32, N> q = cast<i32>(floor(d * c_recip_log_2<f32>));
- vec<f32, N> s, u;
-
- s = fmadd(cast<f32>(q), -ln2_part1, d);
- s = fmadd(cast<f32>(q), -ln2_part2, s);
-
- const f32 c2 = 0.4999999105930328369140625f;
- const f32 c3 = 0.166668415069580078125f;
- const f32 c4 = 4.16539050638675689697265625e-2f;
- const f32 c5 = 8.378830738365650177001953125e-3f;
- const f32 c6 = 1.304379315115511417388916015625e-3f;
- const f32 c7 = 2.7555381529964506626129150390625e-4f;
-
- u = c7;
- u = fmadd(u, s, c6);
- u = fmadd(u, s, c5);
- u = fmadd(u, s, c4);
- u = fmadd(u, s, c3);
- u = fmadd(u, s, c2);
-
- u = s * s * u + s + 1.0f;
- u = vldexpk(u, q);
-
- u = select(d == c_neginfinity<f32>, 0.f, u);
-
- return u;
- }
-
- template <size_t N>
- KFR_SINTRIN vec<f64, N> exp(vec<f64, N> d)
- {
- const f64 ln2_part1 = 0.69314717501401901245;
- const f64 ln2_part2 = 5.545926273775592108e-009;
-
- vec<i64, N> q = cast<i64>(floor(d * c_recip_log_2<f64>));
- vec<f64, N> s, u;
-
- s = fmadd(cast<f64>(q), -ln2_part1, d);
- s = fmadd(cast<f64>(q), -ln2_part2, s);
-
- const f64 c2 = 0.499999999999994948485237955537741072475910186767578;
- const f64 c3 = 0.166666666667024204739888659787538927048444747924805;
- const f64 c4 = 4.16666666578945840693215529881854308769106864929199e-2;
- const f64 c5 = 8.3333334397461874404333670440792047884315252304077e-3;
- const f64 c6 = 1.3888881489747750223179290074426717183087021112442e-3;
- const f64 c7 = 1.9841587032493949419205414574918222569976933300495e-4;
- const f64 c8 = 2.47929324077393282239802768662784160369483288377523e-5;
- const f64 c9 = 2.77076037925831049422552981864598109496000688523054e-6;
- const f64 c10 = 2.59589616274586264243611237120812340606335055781528e-7;
- const f64 c11 = 3.43801438838789632454461529017381016259946591162588e-8;
-
- u = c11;
- u = fmadd(u, s, c10);
- u = fmadd(u, s, c9);
- u = fmadd(u, s, c8);
- u = fmadd(u, s, c7);
- u = fmadd(u, s, c6);
- u = fmadd(u, s, c5);
- u = fmadd(u, s, c4);
- u = fmadd(u, s, c3);
- u = fmadd(u, s, c2);
-
- u = s * s * u + s + 1.0;
- u = vldexpk(u, q);
-
- u = select(d == c_neginfinity<f64>, 0.0, u);
-
- return u;
- }
- template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)>
- KFR_SINTRIN vec<T, N> exp2(vec<T, N> x)
- {
- return exp(x * c_log_2<T>);
- }
- template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)>
- KFR_SINTRIN vec<T, N> exp10(vec<T, N> x)
- {
- return exp(x * c_log_10<T>);
- }
-
- template <typename T1, typename T2>
- KFR_SINTRIN common_type<T1, T2> logn(const T1& a, const T2& b)
- {
- return log(a) / log(b);
- }
-
- template <typename T1, typename T2>
- KFR_SINTRIN common_type<T1, T2> logm(const T1& a, const T2& b)
- {
- return log(a) * b;
- }
-
- template <typename T1, typename T2, typename T3>
- KFR_SINTRIN common_type<T1, T2, T3> exp_fmadd(const T1& x, const T2& m, const T3& a)
- {
- return exp(fmadd(x, m, a));
- }
-
- template <typename T1, typename T2, typename T3>
- KFR_SINTRIN common_type<T1, T2, T3> log_fmadd(const T1& x, const T2& m, const T3& a)
- {
- return fmadd(log(x), m, a);
- }
-
- template <typename T, size_t N>
- KFR_SINTRIN vec<T, N> pow(vec<T, N> a, vec<T, N> b)
- {
- const vec<T, N> t = exp(b * log(abs(a)));
- const mask<T, N> isint = floor(b) == b;
- const mask<T, N> iseven = (cast<itype<T>>(b) & 1) == 0;
- return select(a > T(), t,
- select(a == T(), T(1), select(isint, select(iseven, t, -t), broadcast<N>(c_qnan<T>))));
- }
-
- template <typename T, size_t N>
- KFR_SINTRIN vec<T, N> root(vec<T, N> x, vec<T, N> b)
- {
- return exp(reciprocal(b) * log(x));
- }
-
- template <typename T, size_t N>
- KFR_SINTRIN vec<T, N> cbrt(vec<T, N> x)
- {
- return pow<T, N>(x, T(0.333333333333333333333333333333333));
- }
-
- template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>>
- KFR_SINTRIN vec<Tout, N> exp(vec<T, N> x)
- {
- return exp(cast<Tout>(x));
- }
- template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>>
- KFR_SINTRIN vec<Tout, N> exp2(vec<T, N> x)
- {
- return exp2(cast<Tout>(x));
- }
- template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>>
- KFR_SINTRIN vec<Tout, N> exp10(vec<T, N> x)
- {
- return exp10(cast<Tout>(x));
- }
- template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>>
- KFR_SINTRIN vec<Tout, N> log(vec<T, N> x)
- {
- return log(cast<Tout>(x));
- }
- template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>>
- KFR_SINTRIN vec<Tout, N> log2(vec<T, N> x)
- {
- return log2(cast<Tout>(x));
- }
- template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>>
- KFR_SINTRIN vec<Tout, N> log10(vec<T, N> x)
- {
- return log10(cast<Tout>(x));
- }
- template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>>
- KFR_SINTRIN vec<Tout, N> cbrt(vec<T, N> x)
- {
- return cbrt(cast<Tout>(x));
- }
-
- KFR_HANDLE_SCALAR(exp)
- KFR_HANDLE_SCALAR(exp2)
- KFR_HANDLE_SCALAR(exp10)
- KFR_HANDLE_SCALAR(log)
- KFR_HANDLE_SCALAR(log2)
- KFR_HANDLE_SCALAR(log10)
- KFR_HANDLE_SCALAR(logb)
- KFR_HANDLE_SCALAR(pow)
- KFR_HANDLE_SCALAR(root)
- KFR_HANDLE_SCALAR(cbrt)
-
- KFR_SPEC_FN(in_log_exp, exp)
- KFR_SPEC_FN(in_log_exp, exp2)
- KFR_SPEC_FN(in_log_exp, exp10)
- KFR_SPEC_FN(in_log_exp, log)
- KFR_SPEC_FN(in_log_exp, log2)
- KFR_SPEC_FN(in_log_exp, log10)
- KFR_SPEC_FN(in_log_exp, logb)
- KFR_SPEC_FN(in_log_exp, logn)
- KFR_SPEC_FN(in_log_exp, logm)
- KFR_SPEC_FN(in_log_exp, exp_fmadd)
- KFR_SPEC_FN(in_log_exp, log_fmadd)
- KFR_SPEC_FN(in_log_exp, pow)
- KFR_SPEC_FN(in_log_exp, root)
- KFR_SPEC_FN(in_log_exp, cbrt)
-};
-}
-namespace native
-{
-using fn_exp = internal::in_log_exp<>::fn_exp;
-template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> exp(const T1& x)
+template <size_t N>
+KFR_SINTRIN vec<i32, N> vilogbp1(vec<f32, N> d)
{
- return internal::in_log_exp<>::exp(x);
+ mask<i32, N> m = d < 5.421010862427522E-20f;
+ d = select(m, 1.8446744073709552E19f * d, d);
+ vec<i32, N> q = (ibitcast(d) >> 23) & 0xff;
+ q = select(m, q - (64 + 0x7e), q - 0x7e);
+ return q;
}
-template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_exp, E1> exp(E1&& x)
+template <size_t N>
+KFR_SINTRIN vec<i64, N> vilogbp1(vec<f64, N> d)
{
- return { fn_exp(), std::forward<E1>(x) };
+ mask<i64, N> m = d < 4.9090934652977266E-91;
+ d = select(m, 2.037035976334486E90 * d, d);
+ vec<i64, N> q = (ibitcast(d) >> 52) & 0x7ff;
+ q = select(m, q - (300 + 0x03fe), q - 0x03fe);
+ return q;
}
-using fn_exp2 = internal::in_log_exp<>::fn_exp2;
-template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> exp2(const T1& x)
+template <size_t N>
+KFR_SINTRIN vec<f32, N> vldexpk(vec<f32, N> x, vec<i32, N> q)
{
- return internal::in_log_exp<>::exp2(x);
+ vec<i32, N> m = q >> 31;
+ m = (((m + q) >> 6) - m) << 4;
+ q = q - (m << 2);
+ m = clamp(m + 0x7f, vec<i32, N>(0xff));
+ vec<f32, N> u = pow4(bitcast<f32>(cast<i32>(m) << 23));
+ return x * u * bitcast<f32>((cast<i32>(q + 0x7f)) << 23);
}
-template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_exp2, E1> exp2(E1&& x)
+template <size_t N>
+KFR_SINTRIN vec<f64, N> vldexpk(vec<f64, N> x, vec<i64, N> q)
+{
+ vec<i64, N> m = q >> 31;
+ m = (((m + q) >> 9) - m) << 7;
+ q = q - (m << 2);
+ m = clamp(m + 0x3ff, i64(0x7ff));
+ vec<f64, N> u = pow4(bitcast<f64>(cast<i64>(m) << 52));
+ return x * u * bitcast<f64>((cast<i64>(q + 0x3ff)) << 52);
+}
+
+template <typename T, size_t N>
+KFR_SINTRIN vec<T, N> logb(vec<T, N> x)
+{
+ return select(x == T(), -c_infinity<T>, cast<T>(vilogbp1(x) - 1));
+}
+
+template <size_t N>
+KFR_SINTRIN vec<f32, N> log(vec<f32, N> d)
+{
+ vec<i32, N> e = vilogbp1(d * 0.7071); // 0678118654752440084436210485f );
+ vec<f32, N> m = vldexpk(d, -e);
+
+ vec<f32, N> x = (m - 1.0f) / (m + 1.0f);
+ vec<f32, N> x2 = x * x;
+
+ vec<f32, N> sp = select(d < 0, c_qnan<f32>, c_neginfinity<f32>);
+
+ vec<f32, N> t = 0.2371599674224853515625f;
+ t = fmadd(t, x2, 0.285279005765914916992188f);
+ t = fmadd(t, x2, 0.400005519390106201171875f);
+ t = fmadd(t, x2, 0.666666567325592041015625f);
+ t = fmadd(t, x2, 2.0f);
+
+ x = x * t + c_log_2<f32> * cast<f32>(e);
+ x = select(d > 0, x, sp);
+
+ return x;
+}
+
+template <size_t N>
+KFR_SINTRIN vec<f64, N> log(vec<f64, N> d)
+{
+ vec<i64, N> e = vilogbp1(d * 0.7071); // 0678118654752440084436210485 );
+ vec<f64, N> m = vldexpk(d, -e);
+
+ vec<f64, N> x = (m - 1.0) / (m + 1.0);
+ vec<f64, N> x2 = x * x;
+
+ vec<f64, N> sp = select(d < 0, c_qnan<f64>, c_neginfinity<f64>);
+
+ vec<f64, N> t = 0.148197055177935105296783;
+ t = fmadd(t, x2, 0.153108178020442575739679);
+ t = fmadd(t, x2, 0.181837339521549679055568);
+ t = fmadd(t, x2, 0.22222194152736701733275);
+ t = fmadd(t, x2, 0.285714288030134544449368);
+ t = fmadd(t, x2, 0.399999999989941956712869);
+ t = fmadd(t, x2, 0.666666666666685503450651);
+ t = fmadd(t, x2, 2);
+
+ x = x * t + c_log_2<f64> * cast<f64>(e);
+ x = select(d > 0, x, sp);
+
+ return x;
+}
+
+template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)>
+KFR_SINTRIN vec<T, N> log2(vec<T, N> x)
+{
+ return log(x) * c_recip_log_2<T>;
+}
+template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)>
+KFR_SINTRIN vec<T, N> log10(vec<T, N> x)
+{
+ return log(x) * c_recip_log_10<T>;
+}
+
+template <size_t N>
+KFR_SINTRIN vec<f32, N> exp(vec<f32, N> d)
+{
+ const f32 ln2_part1 = 0.6931457519f;
+ const f32 ln2_part2 = 1.4286067653e-6f;
+
+ vec<i32, N> q = cast<i32>(floor(d * c_recip_log_2<f32>));
+ vec<f32, N> s, u;
+
+ s = fmadd(cast<f32>(q), -ln2_part1, d);
+ s = fmadd(cast<f32>(q), -ln2_part2, s);
+
+ const f32 c2 = 0.4999999105930328369140625f;
+ const f32 c3 = 0.166668415069580078125f;
+ const f32 c4 = 4.16539050638675689697265625e-2f;
+ const f32 c5 = 8.378830738365650177001953125e-3f;
+ const f32 c6 = 1.304379315115511417388916015625e-3f;
+ const f32 c7 = 2.7555381529964506626129150390625e-4f;
+
+ u = c7;
+ u = fmadd(u, s, c6);
+ u = fmadd(u, s, c5);
+ u = fmadd(u, s, c4);
+ u = fmadd(u, s, c3);
+ u = fmadd(u, s, c2);
+
+ u = s * s * u + s + 1.0f;
+ u = vldexpk(u, q);
+
+ u = select(d == c_neginfinity<f32>, 0.f, u);
+
+ return u;
+}
+
+template <size_t N>
+KFR_SINTRIN vec<f64, N> exp(vec<f64, N> d)
+{
+ const f64 ln2_part1 = 0.69314717501401901245;
+ const f64 ln2_part2 = 5.545926273775592108e-009;
+
+ vec<i64, N> q = cast<i64>(floor(d * c_recip_log_2<f64>));
+ vec<f64, N> s, u;
+
+ s = fmadd(cast<f64>(q), -ln2_part1, d);
+ s = fmadd(cast<f64>(q), -ln2_part2, s);
+
+ const f64 c2 = 0.499999999999994948485237955537741072475910186767578;
+ const f64 c3 = 0.166666666667024204739888659787538927048444747924805;
+ const f64 c4 = 4.16666666578945840693215529881854308769106864929199e-2;
+ const f64 c5 = 8.3333334397461874404333670440792047884315252304077e-3;
+ const f64 c6 = 1.3888881489747750223179290074426717183087021112442e-3;
+ const f64 c7 = 1.9841587032493949419205414574918222569976933300495e-4;
+ const f64 c8 = 2.47929324077393282239802768662784160369483288377523e-5;
+ const f64 c9 = 2.77076037925831049422552981864598109496000688523054e-6;
+ const f64 c10 = 2.59589616274586264243611237120812340606335055781528e-7;
+ const f64 c11 = 3.43801438838789632454461529017381016259946591162588e-8;
+
+ u = c11;
+ u = fmadd(u, s, c10);
+ u = fmadd(u, s, c9);
+ u = fmadd(u, s, c8);
+ u = fmadd(u, s, c7);
+ u = fmadd(u, s, c6);
+ u = fmadd(u, s, c5);
+ u = fmadd(u, s, c4);
+ u = fmadd(u, s, c3);
+ u = fmadd(u, s, c2);
+
+ u = s * s * u + s + 1.0;
+ u = vldexpk(u, q);
+
+ u = select(d == c_neginfinity<f64>, 0.0, u);
+
+ return u;
+}
+template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)>
+KFR_SINTRIN vec<T, N> exp2(vec<T, N> x)
+{
+ return exp(x * c_log_2<T>);
+}
+template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)>
+KFR_SINTRIN vec<T, N> exp10(vec<T, N> x)
+{
+ return exp(x * c_log_10<T>);
+}
+
+template <typename T1, typename T2>
+KFR_SINTRIN common_type<T1, T2> logn(const T1& a, const T2& b)
+{
+ return log(a) / log(b);
+}
+
+template <typename T1, typename T2>
+KFR_SINTRIN common_type<T1, T2> logm(const T1& a, const T2& b)
+{
+ return log(a) * b;
+}
+
+template <typename T1, typename T2, typename T3>
+KFR_SINTRIN common_type<T1, T2, T3> exp_fmadd(const T1& x, const T2& m, const T3& a)
{
- return { fn_exp2(), std::forward<E1>(x) };
+ return exp(fmadd(x, m, a));
+}
+
+template <typename T1, typename T2, typename T3>
+KFR_SINTRIN common_type<T1, T2, T3> log_fmadd(const T1& x, const T2& m, const T3& a)
+{
+ return fmadd(log(x), m, a);
+}
+
+template <typename T, size_t N>
+KFR_SINTRIN vec<T, N> pow(vec<T, N> a, vec<T, N> b)
+{
+ const vec<T, N> t = exp(b * log(abs(a)));
+ const mask<T, N> isint = floor(b) == b;
+ const mask<T, N> iseven = (cast<itype<T>>(b) & 1) == 0;
+ return select(a > T(), t,
+ select(a == T(), T(1), select(isint, select(iseven, t, -t), broadcast<N>(c_qnan<T>))));
+}
+
+template <typename T, size_t N>
+KFR_SINTRIN vec<T, N> root(vec<T, N> x, vec<T, N> b)
+{
+ return exp(reciprocal(b) * log(x));
+}
+
+template <typename T, size_t N>
+KFR_SINTRIN vec<T, N> cbrt(vec<T, N> x)
+{
+ return pow<T, N>(x, T(0.333333333333333333333333333333333));
+}
+
+template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>>
+KFR_SINTRIN vec<Tout, N> exp(vec<T, N> x)
+{
+ return exp(cast<Tout>(x));
+}
+template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>>
+KFR_SINTRIN vec<Tout, N> exp2(vec<T, N> x)
+{
+ return exp2(cast<Tout>(x));
+}
+template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>>
+KFR_SINTRIN vec<Tout, N> exp10(vec<T, N> x)
+{
+ return exp10(cast<Tout>(x));
+}
+template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>>
+KFR_SINTRIN vec<Tout, N> log(vec<T, N> x)
+{
+ return log(cast<Tout>(x));
+}
+template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>>
+KFR_SINTRIN vec<Tout, N> log2(vec<T, N> x)
+{
+ return log2(cast<Tout>(x));
+}
+template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>>
+KFR_SINTRIN vec<Tout, N> log10(vec<T, N> x)
+{
+ return log10(cast<Tout>(x));
+}
+template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>>
+KFR_SINTRIN vec<Tout, N> cbrt(vec<T, N> x)
+{
+ return cbrt(cast<Tout>(x));
+}
+
+KFR_HANDLE_SCALAR(exp)
+KFR_HANDLE_SCALAR(exp2)
+KFR_HANDLE_SCALAR(exp10)
+KFR_HANDLE_SCALAR(log)
+KFR_HANDLE_SCALAR(log2)
+KFR_HANDLE_SCALAR(log10)
+KFR_HANDLE_SCALAR(logb)
+KFR_HANDLE_SCALAR(pow)
+KFR_HANDLE_SCALAR(root)
+KFR_HANDLE_SCALAR(cbrt)
+
+KFR_FN(exp)
+KFR_FN(exp2)
+KFR_FN(exp10)
+KFR_FN(log)
+KFR_FN(log2)
+KFR_FN(log10)
+KFR_FN(logb)
+KFR_FN(logn)
+KFR_FN(logm)
+KFR_FN(exp_fmadd)
+KFR_FN(log_fmadd)
+KFR_FN(pow)
+KFR_FN(root)
+KFR_FN(cbrt)
}
-using fn_exp10 = internal::in_log_exp<>::fn_exp10;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> exp10(const T1& x)
+KFR_INTRIN T1 exp(const T1& x)
{
- return internal::in_log_exp<>::exp10(x);
+ return internal::exp(x);
}
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_exp10, E1> exp10(E1&& x)
+KFR_INTRIN expr_func<internal::fn_exp, E1> exp(E1&& x)
{
- return { fn_exp10(), std::forward<E1>(x) };
+ return { {}, std::forward<E1>(x) };
}
-using fn_log = internal::in_log_exp<>::fn_log;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> log(const T1& x)
+KFR_INTRIN T1 exp2(const T1& x)
{
- return internal::in_log_exp<>::log(x);
+ return internal::exp2(x);
}
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_log, E1> log(E1&& x)
+KFR_INTRIN expr_func<internal::fn_exp2, E1> exp2(E1&& x)
{
- return { fn_log(), std::forward<E1>(x) };
+ return { {}, std::forward<E1>(x) };
}
-using fn_log2 = internal::in_log_exp<>::fn_log2;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> log2(const T1& x)
+KFR_INTRIN T1 exp10(const T1& x)
{
- return internal::in_log_exp<>::log2(x);
+ return internal::exp10(x);
}
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_log2, E1> log2(E1&& x)
+KFR_INTRIN expr_func<internal::fn_exp10, E1> exp10(E1&& x)
{
- return { fn_log2(), std::forward<E1>(x) };
+ return { {}, std::forward<E1>(x) };
}
-using fn_log10 = internal::in_log_exp<>::fn_log10;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> log10(const T1& x)
+KFR_INTRIN T1 log(const T1& x)
{
- return internal::in_log_exp<>::log10(x);
+ return internal::log(x);
}
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_log10, E1> log10(E1&& x)
+KFR_INTRIN expr_func<internal::fn_log, E1> log(E1&& x)
{
- return { fn_log10(), std::forward<E1>(x) };
+ return { {}, std::forward<E1>(x) };
}
-using fn_logb = internal::in_log_exp<>::fn_logb;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> logb(const T1& x)
+KFR_INTRIN T1 log2(const T1& x)
{
- return internal::in_log_exp<>::logb(x);
+ return internal::log2(x);
}
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_logb, E1> logb(E1&& x)
+KFR_INTRIN expr_func<internal::fn_log2, E1> log2(E1&& x)
{
- return { fn_logb(), std::forward<E1>(x) };
+ return { {}, std::forward<E1>(x) };
}
-using fn_logn = internal::in_log_exp<>::fn_logn;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> logn(const T1& x)
+KFR_INTRIN T1 log10(const T1& x)
{
- return internal::in_log_exp<>::logn(x);
+ return internal::log10(x);
}
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_logn, E1> logn(E1&& x)
+KFR_INTRIN expr_func<internal::fn_log10, E1> log10(E1&& x)
{
- return { fn_logn(), std::forward<E1>(x) };
+ return { {}, std::forward<E1>(x) };
}
-using fn_logm = internal::in_log_exp<>::fn_logm;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> logm(const T1& x)
+KFR_INTRIN T1 logb(const T1& x)
{
- return internal::in_log_exp<>::logm(x);
+ return internal::logb(x);
}
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_logm, E1> logm(E1&& x)
+KFR_INTRIN expr_func<internal::fn_logb, E1> logb(E1&& x)
+{
+ return { {}, std::forward<E1>(x) };
+}
+
+template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)>
+KFR_INTRIN common_type<T1, T2> logn(const T1& x, const T2& y)
+{
+ return internal::logn(x, y);
+}
+
+template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)>
+KFR_INTRIN expr_func<internal::fn_logn, E1, E2> logn(E1&& x, E2&& y)
+{
+ return { {}, std::forward<E1>(x), std::forward<E2>(y) };
+}
+
+template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)>
+KFR_INTRIN common_type<T1, T2> logm(const T1& x, const T2& y)
{
- return { fn_logm(), std::forward<E1>(x) };
+ return internal::logm(x, y);
+}
+
+template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)>
+KFR_INTRIN expr_func<internal::fn_logm, E1, E2> logm(E1&& x, E2&& y)
+{
+ return { {}, std::forward<E1>(x), std::forward<E2>(y) };
}
-using fn_exp_fmadd = internal::in_log_exp<>::fn_exp_fmadd;
template <typename T1, typename T2, typename T3, KFR_ENABLE_IF(is_numeric_args<T1, T2, T3>::value)>
-KFR_INLINE ftype<common_type<T1, T2, T3>> exp_fmadd(const T1& x, const T2& m, const T3& a)
+KFR_INTRIN common_type<T1, T2, T3> exp_fmadd(const T1& x, const T2& y, const T3& z)
{
- return internal::in_log_exp<>::exp_fmadd(x, m, a);
+ return internal::exp_fmadd(x, y, z);
}
template <typename E1, typename E2, typename E3, KFR_ENABLE_IF(is_input_expressions<E1, E2, E3>::value)>
-KFR_INLINE expr_func<fn_exp_fmadd, E1, E2, E3> exp_fmadd(E1&& x, E2&& m, E3&& a)
+KFR_INTRIN expr_func<internal::fn_exp_fmadd, E1, E2, E3> exp_fmadd(E1&& x, E2&& y, E3&& z)
{
- return { fn_exp_fmadd(), std::forward<E1>(x), std::forward<E2>(m), std::forward<E3>(a) };
+ return { {}, std::forward<E1>(x), std::forward<E2>(y), std::forward<E3>(z) };
}
-using fn_log_fmadd = internal::in_log_exp<>::fn_log_fmadd;
+
template <typename T1, typename T2, typename T3, KFR_ENABLE_IF(is_numeric_args<T1, T2, T3>::value)>
-KFR_INLINE ftype<common_type<T1, T2, T3>> log_fmadd(const T1& x, const T2& m, const T3& a)
+KFR_INTRIN common_type<T1, T2, T3> log_fmadd(const T1& x, const T2& y, const T3& z)
{
- return internal::in_log_exp<>::log_fmadd(x, m, a);
+ return internal::log_fmadd(x, y, z);
}
template <typename E1, typename E2, typename E3, KFR_ENABLE_IF(is_input_expressions<E1, E2, E3>::value)>
-KFR_INLINE expr_func<fn_log_fmadd, E1, E2, E3> log_fmadd(E1&& x, E2&& m, E3&& a)
+KFR_INTRIN expr_func<internal::fn_log_fmadd, E1, E2, E3> log_fmadd(E1&& x, E2&& y, E3&& z)
{
- return { fn_log_fmadd(), std::forward<E1>(x), std::forward<E2>(m), std::forward<E3>(a) };
+ return { {}, std::forward<E1>(x), std::forward<E2>(y), std::forward<E3>(z) };
}
-using fn_pow = internal::in_log_exp<>::fn_pow;
template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)>
-KFR_INLINE ftype<common_type<T1, T2>> pow(const T1& x, const T2& b)
+KFR_INTRIN common_type<T1, T2> pow(const T1& x, const T2& y)
{
- return internal::in_log_exp<>::pow(x, b);
+ return internal::pow(x, y);
}
template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)>
-KFR_INLINE expr_func<fn_pow, E1, E2> pow(E1&& x, E2&& b)
+KFR_INTRIN expr_func<internal::fn_pow, E1, E2> pow(E1&& x, E2&& y)
{
- return { fn_pow(), std::forward<E1>(x), std::forward<E2>(b) };
+ return { {}, std::forward<E1>(x), std::forward<E2>(y) };
}
-using fn_root = internal::in_log_exp<>::fn_root;
+
template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)>
-KFR_INLINE ftype<common_type<T1, T2>> root(const T1& x, const T2& b)
+KFR_INTRIN common_type<T1, T2> root(const T1& x, const T2& y)
{
- return internal::in_log_exp<>::root(x, b);
+ return internal::root(x, y);
}
template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)>
-KFR_INLINE expr_func<fn_root, E1, E2> root(E1&& x, E2&& b)
+KFR_INTRIN expr_func<internal::fn_root, E1, E2> root(E1&& x, E2&& y)
{
- return { fn_root(), std::forward<E1>(x), std::forward<E2>(b) };
+ return { {}, std::forward<E1>(x), std::forward<E2>(y) };
}
-using fn_cbrt = internal::in_log_exp<>::fn_cbrt;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> cbrt(const T1& x)
+KFR_INTRIN T1 cbrt(const T1& x)
{
- return internal::in_log_exp<>::cbrt(x);
+ return internal::cbrt(x);
}
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_cbrt, E1> cbrt(E1&& x)
+KFR_INTRIN expr_func<internal::fn_cbrt, E1> cbrt(E1&& x)
{
- return { fn_cbrt(), std::forward<E1>(x) };
-}
-}
+ return { {}, std::forward<E1>(x) };
}
-#pragma clang diagnostic pop
+
+
+
+}
diff --git a/include/kfr/base/logical.hpp b/include/kfr/base/logical.hpp
@@ -28,30 +28,22 @@
namespace kfr
{
+namespace internal
+{
+
template <size_t bits>
struct bitmask
{
using type = findinttype<0, (1ull << bits) - 1>;
+
bitmask(type val) : value(val) {}
+
template <typename Itype>
bitmask(Itype val) : value(static_cast<type>(val))
{
}
- type value;
-};
-
-namespace internal
-{
-template <cpu_t c = cpu_t::native>
-struct in_bittest : in_bittest<older(c)>
-{
- struct fn_bittestnone : fn_disabled
- {
- };
- struct fn_bittestall : fn_disabled
- {
- };
+ type value;
};
struct logical_and
@@ -61,6 +53,7 @@ struct logical_and
{
return x && y;
}
+
template <typename T>
T operator()(initialvalue<T>)
{
@@ -68,307 +61,190 @@ struct logical_and
}
};
-template <>
-struct in_bittest<cpu_t::common>
-{
- constexpr static cpu_t cpu = cpu_t::common;
-
- template <typename T, size_t N>
- KFR_SINTRIN bitmask<N> getmask(vec<T, N> x)
- {
- typename bitmask<N>::type val = 0;
- for (size_t i = 0; i < N; i++)
- {
- val |= (ubitcast(x[i]) >> (typebits<T>::bits - 1)) << i;
- }
- return val;
- }
+#if defined CID_ARCH_SSE41
+
+KFR_SINTRIN bool bittestnone(f32sse x, f32sse y) { return _mm_testz_ps(*x, *y); }
+KFR_SINTRIN bool bittestnone(f64sse x, f64sse y) { return _mm_testz_pd(*x, *y); }
+KFR_SINTRIN bool bittestnone(u8sse x, u8sse y) { return _mm_testz_si128(*x, *y); }
+KFR_SINTRIN bool bittestnone(u16sse x, u16sse y) { return _mm_testz_si128(*x, *y); }
+KFR_SINTRIN bool bittestnone(u32sse x, u32sse y) { return _mm_testz_si128(*x, *y); }
+KFR_SINTRIN bool bittestnone(u64sse x, u64sse y) { return _mm_testz_si128(*x, *y); }
+KFR_SINTRIN bool bittestnone(i8sse x, i8sse y) { return _mm_testz_si128(*x, *y); }
+KFR_SINTRIN bool bittestnone(i16sse x, i16sse y) { return _mm_testz_si128(*x, *y); }
+KFR_SINTRIN bool bittestnone(i32sse x, i32sse y) { return _mm_testz_si128(*x, *y); }
+KFR_SINTRIN bool bittestnone(i64sse x, i64sse y) { return _mm_testz_si128(*x, *y); }
+KFR_SINTRIN bool bittestnone(f32sse x) { return _mm_testz_ps(*x, *x); }
+KFR_SINTRIN bool bittestnone(f64sse x) { return _mm_testz_pd(*x, *x); }
+KFR_SINTRIN bool bittestnone(u8sse x) { return _mm_testz_si128(*x, *x); }
+KFR_SINTRIN bool bittestnone(u16sse x) { return _mm_testz_si128(*x, *x); }
+KFR_SINTRIN bool bittestnone(u32sse x) { return _mm_testz_si128(*x, *x); }
+KFR_SINTRIN bool bittestnone(u64sse x) { return _mm_testz_si128(*x, *x); }
+KFR_SINTRIN bool bittestnone(i8sse x) { return _mm_testz_si128(*x, *x); }
+KFR_SINTRIN bool bittestnone(i16sse x) { return _mm_testz_si128(*x, *x); }
+KFR_SINTRIN bool bittestnone(i32sse x) { return _mm_testz_si128(*x, *x); }
+KFR_SINTRIN bool bittestnone(i64sse x) { return _mm_testz_si128(*x, *x); }
+KFR_SINTRIN bool bittestall(f32sse x, f32sse y) { return _mm_testc_ps(*x, *y); }
+KFR_SINTRIN bool bittestall(f64sse x, f64sse y) { return _mm_testc_pd(*x, *y); }
+KFR_SINTRIN bool bittestall(u8sse x, u8sse y) { return _mm_testc_si128(*x, *y); }
+KFR_SINTRIN bool bittestall(u16sse x, u16sse y) { return _mm_testc_si128(*x, *y); }
+KFR_SINTRIN bool bittestall(u32sse x, u32sse y) { return _mm_testc_si128(*x, *y); }
+KFR_SINTRIN bool bittestall(u64sse x, u64sse y) { return _mm_testc_si128(*x, *y); }
+KFR_SINTRIN bool bittestall(i8sse x, i8sse y) { return _mm_testc_si128(*x, *y); }
+KFR_SINTRIN bool bittestall(i16sse x, i16sse y) { return _mm_testc_si128(*x, *y); }
+KFR_SINTRIN bool bittestall(i32sse x, i32sse y) { return _mm_testc_si128(*x, *y); }
+KFR_SINTRIN bool bittestall(i64sse x, i64sse y) { return _mm_testc_si128(*x, *y); }
+KFR_SINTRIN bool bittestall(f32sse x) { return _mm_testc_ps(*x, *allonesvector(x)); }
+KFR_SINTRIN bool bittestall(f64sse x) { return _mm_testc_pd(*x, *allonesvector(x)); }
+KFR_SINTRIN bool bittestall(u8sse x) { return _mm_testc_si128(*x, *allonesvector(x)); }
+KFR_SINTRIN bool bittestall(u16sse x) { return _mm_testc_si128(*x, *allonesvector(x)); }
+KFR_SINTRIN bool bittestall(u32sse x) { return _mm_testc_si128(*x, *allonesvector(x)); }
+KFR_SINTRIN bool bittestall(u64sse x) { return _mm_testc_si128(*x, *allonesvector(x)); }
+KFR_SINTRIN bool bittestall(i8sse x) { return _mm_testc_si128(*x, *allonesvector(x)); }
+KFR_SINTRIN bool bittestall(i16sse x) { return _mm_testc_si128(*x, *allonesvector(x)); }
+KFR_SINTRIN bool bittestall(i32sse x) { return _mm_testc_si128(*x, *allonesvector(x)); }
+KFR_SINTRIN bool bittestall(i64sse x) { return _mm_testc_si128(*x, *allonesvector(x)); }
+
+#if defined CID_ARCH_AVX
+KFR_SINTRIN bool bittestnone(f32avx x, f32avx y) { return _mm256_testz_ps(*x, *y); }
+KFR_SINTRIN bool bittestnone(f64avx x, f64avx y) { return _mm256_testz_pd(*x, *y); }
+KFR_SINTRIN bool bittestnone(f32avx x) { return _mm256_testz_ps(*x, *x); }
+KFR_SINTRIN bool bittestnone(f64avx x) { return _mm256_testz_pd(*x, *x); }
+KFR_SINTRIN bool bittestnall(f32avx x, f32avx y) { return _mm256_testc_ps(*x, *y); }
+KFR_SINTRIN bool bittestnall(f64avx x, f64avx y) { return _mm256_testc_pd(*x, *y); }
+KFR_SINTRIN bool bittestnall(f32avx x) { return _mm256_testc_ps(*x, *allonesvector(x)); }
+KFR_SINTRIN bool bittestnall(f64avx x) { return _mm256_testc_pd(*x, *allonesvector(x)); }
+#endif
- template <typename T, size_t N>
- KFR_SINTRIN bool bittestnone(vec<T, N> x)
- {
- return !getmask(x).value;
- }
- template <typename T, size_t N>
- KFR_SINTRIN bool bittestnone(vec<T, N> x, vec<T, N> y)
- {
- return bittestnone(x & y);
- }
+#if defined CID_ARCH_AVX2
+KFR_SINTRIN bool bittestnone(u8avx x, u8avx y) { return _mm256_testz_si256(*x, *y); }
+KFR_SINTRIN bool bittestnone(u16avx x, u16avx y) { return _mm256_testz_si256(*x, *y); }
+KFR_SINTRIN bool bittestnone(u32avx x, u32avx y) { return _mm256_testz_si256(*x, *y); }
+KFR_SINTRIN bool bittestnone(u64avx x, u64avx y) { return _mm256_testz_si256(*x, *y); }
+KFR_SINTRIN bool bittestnone(i8avx x, i8avx y) { return _mm256_testz_si256(*x, *y); }
+KFR_SINTRIN bool bittestnone(i16avx x, i16avx y) { return _mm256_testz_si256(*x, *y); }
+KFR_SINTRIN bool bittestnone(i32avx x, i32avx y) { return _mm256_testz_si256(*x, *y); }
+KFR_SINTRIN bool bittestnone(i64avx x, i64avx y) { return _mm256_testz_si256(*x, *y); }
+
+KFR_SINTRIN bool bittestnone(u8avx x) { return _mm256_testz_si256(*x, *x); }
+KFR_SINTRIN bool bittestnone(u16avx x) { return _mm256_testz_si256(*x, *x); }
+KFR_SINTRIN bool bittestnone(u32avx x) { return _mm256_testz_si256(*x, *x); }
+KFR_SINTRIN bool bittestnone(u64avx x) { return _mm256_testz_si256(*x, *x); }
+KFR_SINTRIN bool bittestnone(i8avx x) { return _mm256_testz_si256(*x, *x); }
+KFR_SINTRIN bool bittestnone(i16avx x) { return _mm256_testz_si256(*x, *x); }
+KFR_SINTRIN bool bittestnone(i32avx x) { return _mm256_testz_si256(*x, *x); }
+KFR_SINTRIN bool bittestnone(i64avx x) { return _mm256_testz_si256(*x, *x); }
+
+KFR_SINTRIN bool bittestall(u8avx x, u8avx y) { return _mm256_testc_si256(*x, *y); }
+KFR_SINTRIN bool bittestall(u16avx x, u16avx y) { return _mm256_testc_si256(*x, *y); }
+KFR_SINTRIN bool bittestall(u32avx x, u32avx y) { return _mm256_testc_si256(*x, *y); }
+KFR_SINTRIN bool bittestall(u64avx x, u64avx y) { return _mm256_testc_si256(*x, *y); }
+KFR_SINTRIN bool bittestall(i8avx x, i8avx y) { return _mm256_testc_si256(*x, *y); }
+KFR_SINTRIN bool bittestall(i16avx x, i16avx y) { return _mm256_testc_si256(*x, *y); }
+KFR_SINTRIN bool bittestall(i32avx x, i32avx y) { return _mm256_testc_si256(*x, *y); }
+KFR_SINTRIN bool bittestall(i64avx x, i64avx y) { return _mm256_testc_si256(*x, *y); }
+
+KFR_SINTRIN bool bittestall(u8avx x) { return _mm256_testc_si256(*x, *allonesvector(x)); }
+KFR_SINTRIN bool bittestall(u16avx x) { return _mm256_testc_si256(*x, *allonesvector(x)); }
+KFR_SINTRIN bool bittestall(u32avx x) { return _mm256_testc_si256(*x, *allonesvector(x)); }
+KFR_SINTRIN bool bittestall(u64avx x) { return _mm256_testc_si256(*x, *allonesvector(x)); }
+KFR_SINTRIN bool bittestall(i8avx x) { return _mm256_testc_si256(*x, *allonesvector(x)); }
+KFR_SINTRIN bool bittestall(i16avx x) { return _mm256_testc_si256(*x, *allonesvector(x)); }
+KFR_SINTRIN bool bittestall(i32avx x) { return _mm256_testc_si256(*x, *allonesvector(x)); }
+KFR_SINTRIN bool bittestall(i64avx x) { return _mm256_testc_si256(*x, *allonesvector(x)); }
+#endif
- template <typename T, size_t N>
- KFR_SINTRIN bool bittestall(vec<T, N> x)
- {
- return !getmask(~x).value;
- }
- template <typename T, size_t N>
- KFR_SINTRIN bool bittestall(vec<T, N> x, vec<T, N> y)
+#elif defined CID_ARCH_SSE2
+
+KFR_SINTRIN bool bittestnone(f32sse x) { return !_mm_movemask_ps(*x); }
+KFR_SINTRIN bool bittestnone(f64sse x) { return !_mm_movemask_pd(*x); }
+KFR_SINTRIN bool bittestnone(u8sse x) { return !_mm_movemask_epi8(*x); }
+KFR_SINTRIN bool bittestnone(u16sse x) { return !_mm_movemask_epi8(*x); }
+KFR_SINTRIN bool bittestnone(u32sse x) { return !_mm_movemask_epi8(*x); }
+KFR_SINTRIN bool bittestnone(u64sse x) { return !_mm_movemask_epi8(*x); }
+KFR_SINTRIN bool bittestnone(i8sse x) { return !_mm_movemask_epi8(*x); }
+KFR_SINTRIN bool bittestnone(i16sse x) { return !_mm_movemask_epi8(*x); }
+KFR_SINTRIN bool bittestnone(i32sse x) { return !_mm_movemask_epi8(*x); }
+KFR_SINTRIN bool bittestnone(i64sse x) { return !_mm_movemask_epi8(*x); }
+
+KFR_SINTRIN bool bittestnone(f32sse x, f32sse y) { return bittestnone(x & y); }
+KFR_SINTRIN bool bittestnone(f64sse x, f64sse y) { return bittestnone(x & y); }
+KFR_SINTRIN bool bittestnone(u8sse x, u8sse y) { return bittestnone(x & y); }
+KFR_SINTRIN bool bittestnone(u16sse x, u16sse y) { return bittestnone(x & y); }
+KFR_SINTRIN bool bittestnone(u32sse x, u32sse y) { return bittestnone(x & y); }
+KFR_SINTRIN bool bittestnone(u64sse x, u64sse y) { return bittestnone(x & y); }
+KFR_SINTRIN bool bittestnone(i8sse x, i8sse y) { return bittestnone(x & y); }
+KFR_SINTRIN bool bittestnone(i16sse x, i16sse y) { return bittestnone(x & y); }
+KFR_SINTRIN bool bittestnone(i32sse x, i32sse y) { return bittestnone(x & y); }
+KFR_SINTRIN bool bittestnone(i64sse x, i64sse y) { return bittestnone(x & y); }
+
+KFR_SINTRIN bool bittestall(f32sse x) { return !_mm_movemask_ps(*~x); }
+KFR_SINTRIN bool bittestall(f64sse x) { return !_mm_movemask_pd(*~x); }
+KFR_SINTRIN bool bittestall(u8sse x) { return !_mm_movemask_epi8(*~x); }
+KFR_SINTRIN bool bittestall(u16sse x) { return !_mm_movemask_epi8(*~x); }
+KFR_SINTRIN bool bittestall(u32sse x) { return !_mm_movemask_epi8(*~x); }
+KFR_SINTRIN bool bittestall(u64sse x) { return !_mm_movemask_epi8(*~x); }
+KFR_SINTRIN bool bittestall(i8sse x) { return !_mm_movemask_epi8(*~x); }
+KFR_SINTRIN bool bittestall(i16sse x) { return !_mm_movemask_epi8(*~x); }
+KFR_SINTRIN bool bittestall(i32sse x) { return !_mm_movemask_epi8(*~x); }
+KFR_SINTRIN bool bittestall(i64sse x) { return !_mm_movemask_epi8(*~x); }
+
+KFR_SINTRIN bool bittestall(f32sse x, f32sse y) { return bittestnone(~x & y); }
+KFR_SINTRIN bool bittestall(f64sse x, f64sse y) { return bittestnone(~x & y); }
+KFR_SINTRIN bool bittestall(u8sse x, u8sse y) { return bittestnone(~x & y); }
+KFR_SINTRIN bool bittestall(u16sse x, u16sse y) { return bittestnone(~x & y); }
+KFR_SINTRIN bool bittestall(u32sse x, u32sse y) { return bittestnone(~x & y); }
+KFR_SINTRIN bool bittestall(u64sse x, u64sse y) { return bittestnone(~x & y); }
+KFR_SINTRIN bool bittestall(i8sse x, i8sse y) { return bittestnone(~x & y); }
+KFR_SINTRIN bool bittestall(i16sse x, i16sse y) { return bittestnone(~x & y); }
+KFR_SINTRIN bool bittestall(i32sse x, i32sse y) { return bittestnone(~x & y); }
+KFR_SINTRIN bool bittestall(i64sse x, i64sse y) { return bittestnone(~x & y); }
+
+#else
+
+template <typename T, size_t N>
+KFR_SINTRIN bitmask<N> getmask(vec<T, N> x)
+{
+ typename bitmask<N>::type val = 0;
+ for (size_t i = 0; i < N; i++)
{
- return bittestnone(~x & y);
+ val |= (ubitcast(x[i]) >> (typebits<T>::bits - 1)) << i;
}
-
- KFR_SPEC_FN(in_bittest, bittestnone)
- KFR_SPEC_FN(in_bittest, bittestall)
-};
-
-#ifdef CID_ARCH_X86
-
-template <>
-struct in_bittest<cpu_t::sse2>
-{
- constexpr static cpu_t cpu = cpu_t::sse2;
-
- KFR_SINTRIN bitmask<4> getmask(f32sse x) { return bitmask<4>(_mm_movemask_pd(*x)); }
- KFR_SINTRIN bitmask<4> getmask(f64sse x) { return bitmask<4>(_mm_movemask_pd(*x)); }
- KFR_SINTRIN bitmask<16> getmask(u8sse x) { return bitmask<16>(_mm_movemask_epi8(*x)); }
- KFR_SINTRIN bitmask<16> getmask(u16sse x) { return bitmask<16>(_mm_movemask_epi8(*x)); }
- KFR_SINTRIN bitmask<16> getmask(u32sse x) { return bitmask<16>(_mm_movemask_epi8(*x)); }
- KFR_SINTRIN bitmask<16> getmask(u64sse x) { return bitmask<16>(_mm_movemask_epi8(*x)); }
- KFR_SINTRIN bitmask<16> getmask(i8sse x) { return bitmask<16>(_mm_movemask_epi8(*x)); }
- KFR_SINTRIN bitmask<16> getmask(i16sse x) { return bitmask<16>(_mm_movemask_epi8(*x)); }
- KFR_SINTRIN bitmask<16> getmask(i32sse x) { return bitmask<16>(_mm_movemask_epi8(*x)); }
- KFR_SINTRIN bitmask<16> getmask(i64sse x) { return bitmask<16>(_mm_movemask_epi8(*x)); }
-
- KFR_SINTRIN bool bittestnone(f32sse x) { return !_mm_movemask_ps(*x); }
- KFR_SINTRIN bool bittestnone(f64sse x) { return !_mm_movemask_pd(*x); }
- KFR_SINTRIN bool bittestnone(u8sse x) { return !_mm_movemask_epi8(*x); }
- KFR_SINTRIN bool bittestnone(u16sse x) { return !_mm_movemask_epi8(*x); }
- KFR_SINTRIN bool bittestnone(u32sse x) { return !_mm_movemask_epi8(*x); }
- KFR_SINTRIN bool bittestnone(u64sse x) { return !_mm_movemask_epi8(*x); }
- KFR_SINTRIN bool bittestnone(i8sse x) { return !_mm_movemask_epi8(*x); }
- KFR_SINTRIN bool bittestnone(i16sse x) { return !_mm_movemask_epi8(*x); }
- KFR_SINTRIN bool bittestnone(i32sse x) { return !_mm_movemask_epi8(*x); }
- KFR_SINTRIN bool bittestnone(i64sse x) { return !_mm_movemask_epi8(*x); }
-
- KFR_SINTRIN bool bittestnone(f32sse x, f32sse y) { return bittestnone(x & y); }
- KFR_SINTRIN bool bittestnone(f64sse x, f64sse y) { return bittestnone(x & y); }
- KFR_SINTRIN bool bittestnone(u8sse x, u8sse y) { return bittestnone(x & y); }
- KFR_SINTRIN bool bittestnone(u16sse x, u16sse y) { return bittestnone(x & y); }
- KFR_SINTRIN bool bittestnone(u32sse x, u32sse y) { return bittestnone(x & y); }
- KFR_SINTRIN bool bittestnone(u64sse x, u64sse y) { return bittestnone(x & y); }
- KFR_SINTRIN bool bittestnone(i8sse x, i8sse y) { return bittestnone(x & y); }
- KFR_SINTRIN bool bittestnone(i16sse x, i16sse y) { return bittestnone(x & y); }
- KFR_SINTRIN bool bittestnone(i32sse x, i32sse y) { return bittestnone(x & y); }
- KFR_SINTRIN bool bittestnone(i64sse x, i64sse y) { return bittestnone(x & y); }
-
- KFR_SINTRIN bool bittestall(f32sse x) { return !_mm_movemask_ps(*~x); }
- KFR_SINTRIN bool bittestall(f64sse x) { return !_mm_movemask_pd(*~x); }
- KFR_SINTRIN bool bittestall(u8sse x) { return !_mm_movemask_epi8(*~x); }
- KFR_SINTRIN bool bittestall(u16sse x) { return !_mm_movemask_epi8(*~x); }
- KFR_SINTRIN bool bittestall(u32sse x) { return !_mm_movemask_epi8(*~x); }
- KFR_SINTRIN bool bittestall(u64sse x) { return !_mm_movemask_epi8(*~x); }
- KFR_SINTRIN bool bittestall(i8sse x) { return !_mm_movemask_epi8(*~x); }
- KFR_SINTRIN bool bittestall(i16sse x) { return !_mm_movemask_epi8(*~x); }
- KFR_SINTRIN bool bittestall(i32sse x) { return !_mm_movemask_epi8(*~x); }
- KFR_SINTRIN bool bittestall(i64sse x) { return !_mm_movemask_epi8(*~x); }
-
- KFR_SINTRIN bool bittestall(f32sse x, f32sse y) { return bittestnone(~x & y); }
- KFR_SINTRIN bool bittestall(f64sse x, f64sse y) { return bittestnone(~x & y); }
- KFR_SINTRIN bool bittestall(u8sse x, u8sse y) { return bittestnone(~x & y); }
- KFR_SINTRIN bool bittestall(u16sse x, u16sse y) { return bittestnone(~x & y); }
- KFR_SINTRIN bool bittestall(u32sse x, u32sse y) { return bittestnone(~x & y); }
- KFR_SINTRIN bool bittestall(u64sse x, u64sse y) { return bittestnone(~x & y); }
- KFR_SINTRIN bool bittestall(i8sse x, i8sse y) { return bittestnone(~x & y); }
- KFR_SINTRIN bool bittestall(i16sse x, i16sse y) { return bittestnone(~x & y); }
- KFR_SINTRIN bool bittestall(i32sse x, i32sse y) { return bittestnone(~x & y); }
- KFR_SINTRIN bool bittestall(i64sse x, i64sse y) { return bittestnone(~x & y); }
-
- KFR_HANDLE_ALL_REDUCE(logical_and, bittestnone)
- KFR_HANDLE_ALL_REDUCE(logical_and, bittestall)
- KFR_SPEC_FN(in_bittest, bittestnone)
- KFR_SPEC_FN(in_bittest, bittestall)
-};
-
-template <>
-struct in_bittest<cpu_t::sse41> : in_bittest<cpu_t::sse2>
-{
- constexpr static cpu_t cpu = cpu_t::sse41;
-
- KFR_SINTRIN bool bittestnone(f32sse x, f32sse y) { return _mm_testz_ps(*x, *y); }
- KFR_SINTRIN bool bittestnone(f64sse x, f64sse y) { return _mm_testz_pd(*x, *y); }
- KFR_SINTRIN bool bittestnone(u8sse x, u8sse y) { return _mm_testz_si128(*x, *y); }
- KFR_SINTRIN bool bittestnone(u16sse x, u16sse y) { return _mm_testz_si128(*x, *y); }
- KFR_SINTRIN bool bittestnone(u32sse x, u32sse y) { return _mm_testz_si128(*x, *y); }
- KFR_SINTRIN bool bittestnone(u64sse x, u64sse y) { return _mm_testz_si128(*x, *y); }
- KFR_SINTRIN bool bittestnone(i8sse x, i8sse y) { return _mm_testz_si128(*x, *y); }
- KFR_SINTRIN bool bittestnone(i16sse x, i16sse y) { return _mm_testz_si128(*x, *y); }
- KFR_SINTRIN bool bittestnone(i32sse x, i32sse y) { return _mm_testz_si128(*x, *y); }
- KFR_SINTRIN bool bittestnone(i64sse x, i64sse y) { return _mm_testz_si128(*x, *y); }
-
- KFR_SINTRIN bool bittestnone(f32sse x) { return _mm_testz_ps(*x, *x); }
- KFR_SINTRIN bool bittestnone(f64sse x) { return _mm_testz_pd(*x, *x); }
- KFR_SINTRIN bool bittestnone(u8sse x) { return _mm_testz_si128(*x, *x); }
- KFR_SINTRIN bool bittestnone(u16sse x) { return _mm_testz_si128(*x, *x); }
- KFR_SINTRIN bool bittestnone(u32sse x) { return _mm_testz_si128(*x, *x); }
- KFR_SINTRIN bool bittestnone(u64sse x) { return _mm_testz_si128(*x, *x); }
- KFR_SINTRIN bool bittestnone(i8sse x) { return _mm_testz_si128(*x, *x); }
- KFR_SINTRIN bool bittestnone(i16sse x) { return _mm_testz_si128(*x, *x); }
- KFR_SINTRIN bool bittestnone(i32sse x) { return _mm_testz_si128(*x, *x); }
- KFR_SINTRIN bool bittestnone(i64sse x) { return _mm_testz_si128(*x, *x); }
-
- KFR_SINTRIN bool bittestall(f32sse x, f32sse y) { return _mm_testc_ps(*x, *y); }
- KFR_SINTRIN bool bittestall(f64sse x, f64sse y) { return _mm_testc_pd(*x, *y); }
- KFR_SINTRIN bool bittestall(u8sse x, u8sse y) { return _mm_testc_si128(*x, *y); }
- KFR_SINTRIN bool bittestall(u16sse x, u16sse y) { return _mm_testc_si128(*x, *y); }
- KFR_SINTRIN bool bittestall(u32sse x, u32sse y) { return _mm_testc_si128(*x, *y); }
- KFR_SINTRIN bool bittestall(u64sse x, u64sse y) { return _mm_testc_si128(*x, *y); }
- KFR_SINTRIN bool bittestall(i8sse x, i8sse y) { return _mm_testc_si128(*x, *y); }
- KFR_SINTRIN bool bittestall(i16sse x, i16sse y) { return _mm_testc_si128(*x, *y); }
- KFR_SINTRIN bool bittestall(i32sse x, i32sse y) { return _mm_testc_si128(*x, *y); }
- KFR_SINTRIN bool bittestall(i64sse x, i64sse y) { return _mm_testc_si128(*x, *y); }
-
- KFR_SINTRIN bool bittestall(f32sse x) { return _mm_testc_ps(*x, *allonesvector(x)); }
- KFR_SINTRIN bool bittestall(f64sse x) { return _mm_testc_pd(*x, *allonesvector(x)); }
- KFR_SINTRIN bool bittestall(u8sse x) { return _mm_testc_si128(*x, *allonesvector(x)); }
- KFR_SINTRIN bool bittestall(u16sse x) { return _mm_testc_si128(*x, *allonesvector(x)); }
- KFR_SINTRIN bool bittestall(u32sse x) { return _mm_testc_si128(*x, *allonesvector(x)); }
- KFR_SINTRIN bool bittestall(u64sse x) { return _mm_testc_si128(*x, *allonesvector(x)); }
- KFR_SINTRIN bool bittestall(i8sse x) { return _mm_testc_si128(*x, *allonesvector(x)); }
- KFR_SINTRIN bool bittestall(i16sse x) { return _mm_testc_si128(*x, *allonesvector(x)); }
- KFR_SINTRIN bool bittestall(i32sse x) { return _mm_testc_si128(*x, *allonesvector(x)); }
- KFR_SINTRIN bool bittestall(i64sse x) { return _mm_testc_si128(*x, *allonesvector(x)); }
-
- KFR_HANDLE_ALL_REDUCE(logical_and, bittestnone)
- KFR_HANDLE_ALL_REDUCE(logical_and, bittestall)
- KFR_SPEC_FN(in_bittest, bittestnone)
- KFR_SPEC_FN(in_bittest, bittestall)
-};
-
-template <>
-struct in_bittest<cpu_t::avx1> : in_bittest<cpu_t::sse41>
-{
- constexpr static cpu_t cpu = cpu_t::avx1;
- using in_bittest<cpu_t::sse41>::bittestnone;
- using in_bittest<cpu_t::sse41>::bittestall;
-
- KFR_SINTRIN bitmask<8> getmask(f32avx x) { return bitmask<8>(_mm256_movemask_pd(*x)); }
- KFR_SINTRIN bitmask<8> getmask(f64avx x) { return bitmask<8>(_mm256_movemask_pd(*x)); }
-
- KFR_SINTRIN bool bittestnone(f32avx x, f32avx y) { return _mm256_testz_ps(*x, *y); }
- KFR_SINTRIN bool bittestnone(f64avx x, f64avx y) { return _mm256_testz_pd(*x, *y); }
- KFR_SINTRIN bool bittestnone(f32avx x) { return _mm256_testz_ps(*x, *x); }
- KFR_SINTRIN bool bittestnone(f64avx x) { return _mm256_testz_pd(*x, *x); }
- KFR_SINTRIN bool bittestnall(f32avx x, f32avx y) { return _mm256_testc_ps(*x, *y); }
- KFR_SINTRIN bool bittestnall(f64avx x, f64avx y) { return _mm256_testc_pd(*x, *y); }
- KFR_SINTRIN bool bittestnall(f32avx x) { return _mm256_testc_ps(*x, *allonesvector(x)); }
- KFR_SINTRIN bool bittestnall(f64avx x) { return _mm256_testc_pd(*x, *allonesvector(x)); }
-
- KFR_HANDLE_ALL_REDUCE(logical_and, bittestnone)
- KFR_HANDLE_ALL_REDUCE(logical_and, bittestall)
- KFR_SPEC_FN(in_bittest, bittestnone)
- KFR_SPEC_FN(in_bittest, bittestall)
-};
-
-template <>
-struct in_bittest<cpu_t::avx2> : in_bittest<cpu_t::avx1>
-{
- constexpr static cpu_t cpu = cpu_t::avx2;
- using in_bittest<cpu_t::avx1>::bittestnone;
- using in_bittest<cpu_t::avx1>::bittestall;
-
- KFR_SINTRIN bitmask<32> getmask(u8avx x) { return bitmask<32>(_mm256_movemask_epi8(*x)); }
- KFR_SINTRIN bitmask<32> getmask(u16avx x) { return bitmask<32>(_mm256_movemask_epi8(*x)); }
- KFR_SINTRIN bitmask<32> getmask(u32avx x) { return bitmask<32>(_mm256_movemask_epi8(*x)); }
- KFR_SINTRIN bitmask<32> getmask(u64avx x) { return bitmask<32>(_mm256_movemask_epi8(*x)); }
- KFR_SINTRIN bitmask<32> getmask(i8avx x) { return bitmask<32>(_mm256_movemask_epi8(*x)); }
- KFR_SINTRIN bitmask<32> getmask(i16avx x) { return bitmask<32>(_mm256_movemask_epi8(*x)); }
- KFR_SINTRIN bitmask<32> getmask(i32avx x) { return bitmask<32>(_mm256_movemask_epi8(*x)); }
- KFR_SINTRIN bitmask<32> getmask(i64avx x) { return bitmask<32>(_mm256_movemask_epi8(*x)); }
-
- KFR_SINTRIN bool bittestnone(u8avx x, u8avx y) { return _mm256_testz_si256(*x, *y); }
- KFR_SINTRIN bool bittestnone(u16avx x, u16avx y) { return _mm256_testz_si256(*x, *y); }
- KFR_SINTRIN bool bittestnone(u32avx x, u32avx y) { return _mm256_testz_si256(*x, *y); }
- KFR_SINTRIN bool bittestnone(u64avx x, u64avx y) { return _mm256_testz_si256(*x, *y); }
- KFR_SINTRIN bool bittestnone(i8avx x, i8avx y) { return _mm256_testz_si256(*x, *y); }
- KFR_SINTRIN bool bittestnone(i16avx x, i16avx y) { return _mm256_testz_si256(*x, *y); }
- KFR_SINTRIN bool bittestnone(i32avx x, i32avx y) { return _mm256_testz_si256(*x, *y); }
- KFR_SINTRIN bool bittestnone(i64avx x, i64avx y) { return _mm256_testz_si256(*x, *y); }
-
- KFR_SINTRIN bool bittestnone(u8avx x) { return _mm256_testz_si256(*x, *x); }
- KFR_SINTRIN bool bittestnone(u16avx x) { return _mm256_testz_si256(*x, *x); }
- KFR_SINTRIN bool bittestnone(u32avx x) { return _mm256_testz_si256(*x, *x); }
- KFR_SINTRIN bool bittestnone(u64avx x) { return _mm256_testz_si256(*x, *x); }
- KFR_SINTRIN bool bittestnone(i8avx x) { return _mm256_testz_si256(*x, *x); }
- KFR_SINTRIN bool bittestnone(i16avx x) { return _mm256_testz_si256(*x, *x); }
- KFR_SINTRIN bool bittestnone(i32avx x) { return _mm256_testz_si256(*x, *x); }
- KFR_SINTRIN bool bittestnone(i64avx x) { return _mm256_testz_si256(*x, *x); }
-
- KFR_SINTRIN bool bittestall(u8avx x, u8avx y) { return _mm256_testc_si256(*x, *y); }
- KFR_SINTRIN bool bittestall(u16avx x, u16avx y) { return _mm256_testc_si256(*x, *y); }
- KFR_SINTRIN bool bittestall(u32avx x, u32avx y) { return _mm256_testc_si256(*x, *y); }
- KFR_SINTRIN bool bittestall(u64avx x, u64avx y) { return _mm256_testc_si256(*x, *y); }
- KFR_SINTRIN bool bittestall(i8avx x, i8avx y) { return _mm256_testc_si256(*x, *y); }
- KFR_SINTRIN bool bittestall(i16avx x, i16avx y) { return _mm256_testc_si256(*x, *y); }
- KFR_SINTRIN bool bittestall(i32avx x, i32avx y) { return _mm256_testc_si256(*x, *y); }
- KFR_SINTRIN bool bittestall(i64avx x, i64avx y) { return _mm256_testc_si256(*x, *y); }
-
- KFR_SINTRIN bool bittestall(u8avx x) { return _mm256_testc_si256(*x, *allonesvector(x)); }
- KFR_SINTRIN bool bittestall(u16avx x) { return _mm256_testc_si256(*x, *allonesvector(x)); }
- KFR_SINTRIN bool bittestall(u32avx x) { return _mm256_testc_si256(*x, *allonesvector(x)); }
- KFR_SINTRIN bool bittestall(u64avx x) { return _mm256_testc_si256(*x, *allonesvector(x)); }
- KFR_SINTRIN bool bittestall(i8avx x) { return _mm256_testc_si256(*x, *allonesvector(x)); }
- KFR_SINTRIN bool bittestall(i16avx x) { return _mm256_testc_si256(*x, *allonesvector(x)); }
- KFR_SINTRIN bool bittestall(i32avx x) { return _mm256_testc_si256(*x, *allonesvector(x)); }
- KFR_SINTRIN bool bittestall(i64avx x) { return _mm256_testc_si256(*x, *allonesvector(x)); }
-
- KFR_HANDLE_ALL_REDUCE(logical_and, bittestnone)
- KFR_HANDLE_ALL_REDUCE(logical_and, bittestall)
- KFR_SPEC_FN(in_bittest, bittestnone)
- KFR_SPEC_FN(in_bittest, bittestall)
-};
-#endif
+ return val;
}
-namespace native
-{
-using fn_bittestnone = internal::in_bittest<>::fn_bittestnone;
-template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> bittestnone(const T1& x)
+template <typename T, size_t N>
+KFR_SINTRIN bool bittestnone(vec<T, N> x)
{
- return internal::in_bittest<>::bittestnone(x);
+ return !getmask(x).value;
}
-
-template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_bittestnone, E1> bittestnone(E1&& x)
+template <typename T, size_t N>
+KFR_SINTRIN bool bittestnone(vec<T, N> x, vec<T, N> y)
{
- return { fn_bittestnone(), std::forward<E1>(x) };
+ return bittestnone(x & y);
}
-using fn_bittestall = internal::in_bittest<>::fn_bittestall;
-template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> bittestall(const T1& x)
+template <typename T, size_t N>
+KFR_SINTRIN bool bittestall(vec<T, N> x)
{
- return internal::in_bittest<>::bittestall(x);
+ return !getmask(~x).value;
}
-
-template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_bittestall, E1> bittestall(E1&& x)
+template <typename T, size_t N>
+KFR_SINTRIN bool bittestall(vec<T, N> x, vec<T, N> y)
{
- return { fn_bittestall(), std::forward<E1>(x) };
+ return bittestnone(~x & y);
}
-
-using fn_bittestnone = internal::in_bittest<>::fn_bittestnone;
-template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)>
-KFR_INLINE ftype<common_type<T1, T2>> bittestnone(const T1& x, const T2& y)
-{
- return internal::in_bittest<>::bittestnone(x, y);
+#endif
}
-template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)>
-KFR_INLINE expr_func<fn_bittestnone, E1, E2> bittestnone(E1&& x, E2&& y)
-{
- return { fn_bittestnone(), std::forward<E1>(x), std::forward<E2>(y) };
-}
-using fn_bittestall = internal::in_bittest<>::fn_bittestall;
-template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)>
-KFR_INLINE ftype<common_type<T1, T2>> bittestall(const T1& x, const T2& y)
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+KFR_INTRIN T1 bittestnone(const T1& x)
{
- return internal::in_bittest<>::bittestall(x, y);
+ return internal::bittestnone(x);
}
-template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)>
-KFR_INLINE expr_func<fn_bittestall, E1, E2> bittestall(E1&& x, E2&& y)
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+KFR_INTRIN T1 bittestall(const T1& x)
{
- return { fn_bittestall(), std::forward<E1>(x), std::forward<E2>(y) };
-}
+ return internal::bittestall(x);
}
}
diff --git a/include/kfr/base/min_max.hpp b/include/kfr/base/min_max.hpp
@@ -27,383 +27,173 @@
#include "operators.hpp"
#include "select.hpp"
-#pragma clang diagnostic push
-#if CID_HAS_WARNING("-Winaccessible-base")
-#pragma clang diagnostic ignored "-Winaccessible-base"
-#endif
-
namespace kfr
{
namespace internal
{
-template <cpu_t cpu = cpu_t::native, cpu_t cc = cpu>
-struct in_min_max : in_min_max<older(cpu), cc>
-{
- struct fn_min : in_min_max<older(cpu), cc>::fn_min, fn_disabled
- {
- };
- struct fn_max : in_min_max<older(cpu), cc>::fn_max, fn_disabled
- {
- };
-};
-
-template <cpu_t cc>
-struct in_min_max<cpu_t::common, cc> : in_select<cc>
-{
- constexpr static cpu_t cpu = cpu_t::common;
-
-private:
- using in_select<cc>::select;
-
-public:
- template <typename T>
- KFR_SINTRIN T min(initialvalue<T>)
- {
- return std::numeric_limits<T>::max();
- }
- template <typename T>
- KFR_SINTRIN T max(initialvalue<T>)
- {
- return std::numeric_limits<T>::min();
- }
-
- template <typename T, size_t N>
- KFR_SINTRIN vec<T, N> min(vec<T, N> x, vec<T, N> y)
- {
- return select(x < y, x, y);
- }
- template <typename T, size_t N>
- KFR_SINTRIN vec<T, N> max(vec<T, N> x, vec<T, N> y)
- {
- return select(x > y, x, y);
- }
-
- KFR_HANDLE_SCALAR(min)
- KFR_HANDLE_SCALAR(max)
- KFR_SPEC_FN(in_min_max, min)
- KFR_SPEC_FN(in_min_max, max)
-};
-
-#ifdef CID_ARCH_X86
-
-template <cpu_t cc>
-struct in_min_max<cpu_t::sse2, cc> : in_select<cc>
-{
- constexpr static cpu_t cpu = cpu_t::sse2;
-
-private:
- using in_select<cc>::select;
-
-public:
- template <typename T>
- KFR_SINTRIN T min(initialvalue<T>)
- {
- return std::numeric_limits<T>::max();
- }
- template <typename T>
- KFR_SINTRIN T max(initialvalue<T>)
- {
- return std::numeric_limits<T>::min();
- }
-
- KFR_CPU_INTRIN(sse2) f32sse min(f32sse x, f32sse y) { return _mm_min_ps(*x, *y); }
- KFR_CPU_INTRIN(sse2) f64sse min(f64sse x, f64sse y) { return _mm_min_pd(*x, *y); }
- KFR_CPU_INTRIN(sse2) i8sse min(i8sse x, i8sse y) { return select(x < y, x, y); }
- KFR_CPU_INTRIN(sse2) u16sse min(u16sse x, u16sse y) { return select(x < y, x, y); }
- KFR_CPU_INTRIN(sse2) i32sse min(i32sse x, i32sse y) { return select(x < y, x, y); }
- KFR_CPU_INTRIN(sse2) u32sse min(u32sse x, u32sse y) { return select(x < y, x, y); }
- KFR_CPU_INTRIN(sse2) u8sse min(u8sse x, u8sse y) { return _mm_min_epu8(*x, *y); }
- KFR_CPU_INTRIN(sse2) i16sse min(i16sse x, i16sse y) { return _mm_min_epi16(*x, *y); }
- KFR_CPU_INTRIN(sse2) i64sse min(i64sse x, i64sse y) { return select(x < y, x, y); }
- KFR_CPU_INTRIN(sse2) u64sse min(u64sse x, u64sse y) { return select(x < y, x, y); }
-
- KFR_CPU_INTRIN(sse2) f32sse max(f32sse x, f32sse y) { return _mm_max_ps(*x, *y); }
- KFR_CPU_INTRIN(sse2) f64sse max(f64sse x, f64sse y) { return _mm_max_pd(*x, *y); }
- KFR_CPU_INTRIN(sse2) i8sse max(i8sse x, i8sse y) { return select(x > y, x, y); }
- KFR_CPU_INTRIN(sse2) u16sse max(u16sse x, u16sse y) { return select(x > y, x, y); }
- KFR_CPU_INTRIN(sse2) i32sse max(i32sse x, i32sse y) { return select(x > y, x, y); }
- KFR_CPU_INTRIN(sse2) u32sse max(u32sse x, u32sse y) { return select(x > y, x, y); }
- KFR_CPU_INTRIN(sse2) u8sse max(u8sse x, u8sse y) { return _mm_max_epu8(*x, *y); }
- KFR_CPU_INTRIN(sse2) i16sse max(i16sse x, i16sse y) { return _mm_max_epi16(*x, *y); }
- KFR_CPU_INTRIN(sse2) i64sse max(i64sse x, i64sse y) { return select(x > y, x, y); }
- KFR_CPU_INTRIN(sse2) u64sse max(u64sse x, u64sse y) { return select(x > y, x, y); }
-
- KFR_HANDLE_ALL(min)
- KFR_HANDLE_ALL(max)
- KFR_HANDLE_SCALAR(min)
- KFR_HANDLE_SCALAR(max)
- KFR_SPEC_FN(in_min_max, min)
- KFR_SPEC_FN(in_min_max, max)
-};
-
-template <cpu_t cc>
-struct in_min_max<cpu_t::sse41, cc> : in_min_max<cpu_t::sse2>
-{
- constexpr static cpu_t cpu = cpu_t::sse41;
- using in_min_max<cpu_t::sse2>::min;
- using in_min_max<cpu_t::sse2>::max;
-
- KFR_CPU_INTRIN(sse41) i8sse min(i8sse x, i8sse y) { return _mm_min_epi8(*x, *y); }
- KFR_CPU_INTRIN(sse41) u16sse min(u16sse x, u16sse y) { return _mm_min_epu16(*x, *y); }
- KFR_CPU_INTRIN(sse41) i32sse min(i32sse x, i32sse y) { return _mm_min_epi32(*x, *y); }
- KFR_CPU_INTRIN(sse41) u32sse min(u32sse x, u32sse y) { return _mm_min_epu32(*x, *y); }
-
- KFR_CPU_INTRIN(sse41) i8sse max(i8sse x, i8sse y) { return _mm_max_epi8(*x, *y); }
- KFR_CPU_INTRIN(sse41) u16sse max(u16sse x, u16sse y) { return _mm_max_epu16(*x, *y); }
- KFR_CPU_INTRIN(sse41) i32sse max(i32sse x, i32sse y) { return _mm_max_epi32(*x, *y); }
- KFR_CPU_INTRIN(sse41) u32sse max(u32sse x, u32sse y) { return _mm_max_epu32(*x, *y); }
-
- KFR_HANDLE_ALL(min)
- KFR_HANDLE_ALL(max)
- KFR_HANDLE_SCALAR(min)
- KFR_HANDLE_SCALAR(max)
- KFR_SPEC_FN(in_min_max, min)
- KFR_SPEC_FN(in_min_max, max)
-};
-
-template <cpu_t cc>
-struct in_min_max<cpu_t::avx1, cc> : in_min_max<cpu_t::sse41>
-{
- constexpr static cpu_t cpu = cpu_t::avx1;
- using in_min_max<cpu_t::sse41>::min;
- using in_min_max<cpu_t::sse41>::max;
-
- KFR_CPU_INTRIN(avx) f32avx min(f32avx x, f32avx y) { return _mm256_min_ps(*x, *y); }
- KFR_CPU_INTRIN(avx) f64avx min(f64avx x, f64avx y) { return _mm256_min_pd(*x, *y); }
- KFR_CPU_INTRIN(avx) f32avx max(f32avx x, f32avx y) { return _mm256_max_ps(*x, *y); }
- KFR_CPU_INTRIN(avx) f64avx max(f64avx x, f64avx y) { return _mm256_max_pd(*x, *y); }
-
- KFR_HANDLE_ALL(min)
- KFR_HANDLE_ALL(max)
- KFR_HANDLE_SCALAR(min)
- KFR_HANDLE_SCALAR(max)
- KFR_SPEC_FN(in_min_max, min)
- KFR_SPEC_FN(in_min_max, max)
-};
-
-template <cpu_t cc>
-struct in_min_max<cpu_t::avx2, cc> : in_min_max<cpu_t::avx1>, in_select<cpu_t::avx2>
-{
- constexpr static cpu_t cpu = cpu_t::avx2;
-
-private:
- using in_select<cpu>::select;
-
-public:
- using in_min_max<cpu_t::avx1>::min;
- using in_min_max<cpu_t::avx1>::max;
-
- KFR_CPU_INTRIN(avx2) u8avx min(u8avx x, u8avx y) { return _mm256_min_epu8(*x, *y); }
- KFR_CPU_INTRIN(avx2) i16avx min(i16avx x, i16avx y) { return _mm256_min_epi16(*x, *y); }
- KFR_CPU_INTRIN(avx2) i8avx min(i8avx x, i8avx y) { return _mm256_min_epi8(*x, *y); }
- KFR_CPU_INTRIN(avx2) u16avx min(u16avx x, u16avx y) { return _mm256_min_epu16(*x, *y); }
- KFR_CPU_INTRIN(avx2) i32avx min(i32avx x, i32avx y) { return _mm256_min_epi32(*x, *y); }
- KFR_CPU_INTRIN(avx2) u32avx min(u32avx x, u32avx y) { return _mm256_min_epu32(*x, *y); }
-
- KFR_CPU_INTRIN(avx2) u8avx max(u8avx x, u8avx y) { return _mm256_max_epu8(*x, *y); }
- KFR_CPU_INTRIN(avx2) i16avx max(i16avx x, i16avx y) { return _mm256_max_epi16(*x, *y); }
- KFR_CPU_INTRIN(avx2) i8avx max(i8avx x, i8avx y) { return _mm256_max_epi8(*x, *y); }
- KFR_CPU_INTRIN(avx2) u16avx max(u16avx x, u16avx y) { return _mm256_max_epu16(*x, *y); }
- KFR_CPU_INTRIN(avx2) i32avx max(i32avx x, i32avx y) { return _mm256_max_epi32(*x, *y); }
- KFR_CPU_INTRIN(avx2) u32avx max(u32avx x, u32avx y) { return _mm256_max_epu32(*x, *y); }
+#if defined CID_ARCH_SSE2
+
+KFR_SINTRIN f32sse min(f32sse x, f32sse y) { return _mm_min_ps(*x, *y); }
+KFR_SINTRIN f64sse min(f64sse x, f64sse y) { return _mm_min_pd(*x, *y); }
+KFR_SINTRIN u8sse min(u8sse x, u8sse y) { return _mm_min_epu8(*x, *y); }
+KFR_SINTRIN i16sse min(i16sse x, i16sse y) { return _mm_min_epi16(*x, *y); }
+KFR_SINTRIN i64sse min(i64sse x, i64sse y) { return select(x < y, x, y); }
+KFR_SINTRIN u64sse min(u64sse x, u64sse y) { return select(x < y, x, y); }
+
+KFR_SINTRIN f32sse max(f32sse x, f32sse y) { return _mm_max_ps(*x, *y); }
+KFR_SINTRIN f64sse max(f64sse x, f64sse y) { return _mm_max_pd(*x, *y); }
+KFR_SINTRIN u8sse max(u8sse x, u8sse y) { return _mm_max_epu8(*x, *y); }
+KFR_SINTRIN i16sse max(i16sse x, i16sse y) { return _mm_max_epi16(*x, *y); }
+KFR_SINTRIN i64sse max(i64sse x, i64sse y) { return select(x > y, x, y); }
+KFR_SINTRIN u64sse max(u64sse x, u64sse y) { return select(x > y, x, y); }
+
+#if defined CID_ARCH_AVX2
+KFR_SINTRIN u8avx min(u8avx x, u8avx y) { return _mm256_min_epu8(*x, *y); }
+KFR_SINTRIN i16avx min(i16avx x, i16avx y) { return _mm256_min_epi16(*x, *y); }
+KFR_SINTRIN i8avx min(i8avx x, i8avx y) { return _mm256_min_epi8(*x, *y); }
+KFR_SINTRIN u16avx min(u16avx x, u16avx y) { return _mm256_min_epu16(*x, *y); }
+KFR_SINTRIN i32avx min(i32avx x, i32avx y) { return _mm256_min_epi32(*x, *y); }
+KFR_SINTRIN u32avx min(u32avx x, u32avx y) { return _mm256_min_epu32(*x, *y); }
+
+KFR_SINTRIN u8avx max(u8avx x, u8avx y) { return _mm256_max_epu8(*x, *y); }
+KFR_SINTRIN i16avx max(i16avx x, i16avx y) { return _mm256_max_epi16(*x, *y); }
+KFR_SINTRIN i8avx max(i8avx x, i8avx y) { return _mm256_max_epi8(*x, *y); }
+KFR_SINTRIN u16avx max(u16avx x, u16avx y) { return _mm256_max_epu16(*x, *y); }
+KFR_SINTRIN i32avx max(i32avx x, i32avx y) { return _mm256_max_epi32(*x, *y); }
+KFR_SINTRIN u32avx max(u32avx x, u32avx y) { return _mm256_max_epu32(*x, *y); }
+
+KFR_SINTRIN i64avx min(i64avx x, i64avx y) { return select(x < y, x, y); }
+KFR_SINTRIN u64avx min(u64avx x, u64avx y) { return select(x < y, x, y); }
+KFR_SINTRIN i64avx max(i64avx x, i64avx y) { return select(x > y, x, y); }
+KFR_SINTRIN u64avx max(u64avx x, u64avx y) { return select(x > y, x, y); }
+#endif
- KFR_CPU_INTRIN(avx2) i64avx min(i64avx x, i64avx y) { return select(x < y, x, y); }
- KFR_CPU_INTRIN(avx2) u64avx min(u64avx x, u64avx y) { return select(x < y, x, y); }
- KFR_CPU_INTRIN(avx2) i64avx max(i64avx x, i64avx y) { return select(x > y, x, y); }
- KFR_CPU_INTRIN(avx2) u64avx max(u64avx x, u64avx y) { return select(x > y, x, y); }
+#if defined CID_ARCH_AVX
+KFR_SINTRIN f32avx min(f32avx x, f32avx y) { return _mm256_min_ps(*x, *y); }
+KFR_SINTRIN f64avx min(f64avx x, f64avx y) { return _mm256_min_pd(*x, *y); }
+KFR_SINTRIN f32avx max(f32avx x, f32avx y) { return _mm256_max_ps(*x, *y); }
+KFR_SINTRIN f64avx max(f64avx x, f64avx y) { return _mm256_max_pd(*x, *y); }
+#endif
- KFR_HANDLE_ALL(min)
- KFR_HANDLE_ALL(max)
- KFR_HANDLE_SCALAR(min)
- KFR_HANDLE_SCALAR(max)
- KFR_SPEC_FN(in_min_max, min)
- KFR_SPEC_FN(in_min_max, max)
-};
+#if defined CID_ARCH_SSE41
+KFR_SINTRIN i8sse min(i8sse x, i8sse y) { return _mm_min_epi8(*x, *y); }
+KFR_SINTRIN u16sse min(u16sse x, u16sse y) { return _mm_min_epu16(*x, *y); }
+KFR_SINTRIN i32sse min(i32sse x, i32sse y) { return _mm_min_epi32(*x, *y); }
+KFR_SINTRIN u32sse min(u32sse x, u32sse y) { return _mm_min_epu32(*x, *y); }
+
+KFR_SINTRIN i8sse max(i8sse x, i8sse y) { return _mm_max_epi8(*x, *y); }
+KFR_SINTRIN u16sse max(u16sse x, u16sse y) { return _mm_max_epu16(*x, *y); }
+KFR_SINTRIN i32sse max(i32sse x, i32sse y) { return _mm_max_epi32(*x, *y); }
+KFR_SINTRIN u32sse max(u32sse x, u32sse y) { return _mm_max_epu32(*x, *y); }
+#else
+KFR_SINTRIN i8sse min(i8sse x, i8sse y) { return select(x < y, x, y); }
+KFR_SINTRIN u16sse min(u16sse x, u16sse y) { return select(x < y, x, y); }
+KFR_SINTRIN i32sse min(i32sse x, i32sse y) { return select(x < y, x, y); }
+KFR_SINTRIN u32sse min(u32sse x, u32sse y) { return select(x < y, x, y); }
+
+KFR_SINTRIN i8sse max(i8sse x, i8sse y) { return select(x > y, x, y); }
+KFR_SINTRIN u16sse max(u16sse x, u16sse y) { return select(x > y, x, y); }
+KFR_SINTRIN i32sse max(i32sse x, i32sse y) { return select(x > y, x, y); }
+KFR_SINTRIN u32sse max(u32sse x, u32sse y) { return select(x > y, x, y); }
#endif
-template <cpu_t cpu = cpu_t::native>
-struct in_minabs_maxabs
-{
-public:
- template <typename T, size_t N>
- KFR_SINTRIN vec<T, N> minabs(vec<T, N> x, vec<T, N> y)
- {
- return in_min_max<cpu>::min(in_abs<cpu>::abs(x), in_abs<cpu>::abs(y));
- }
- template <typename T, size_t N>
- KFR_SINTRIN vec<T, N> maxabs(vec<T, N> x, vec<T, N> y)
- {
- return in_min_max<cpu>::max(in_abs<cpu>::abs(x), in_abs<cpu>::abs(y));
- }
+KFR_HANDLE_ALL_SIZES_2(min)
+KFR_HANDLE_ALL_SIZES_2(max)
- KFR_HANDLE_ALL(minabs)
- KFR_HANDLE_ALL(maxabs)
- KFR_HANDLE_SCALAR(min)
- KFR_HANDLE_SCALAR(max)
- KFR_SPEC_FN(in_minabs_maxabs, minabs)
- KFR_SPEC_FN(in_minabs_maxabs, maxabs)
-};
+#else
-template <cpu_t cpu = cpu_t::native>
-struct in_clamp : in_min_max<cpu>
+// fallback
+template <typename T, size_t N>
+KFR_SINTRIN vec<T, N> min(vec<T, N> x, vec<T, N> y)
{
- using in_min_max<cpu>::min;
- using in_min_max<cpu>::max;
-
- template <typename T, size_t N>
- KFR_SINTRIN vec<T, N> clamp(vec<T, N> x, T minimum, T maximum)
- {
- return clamp(x, broadcast<N>(minimum), broadcast<N>(maximum));
- }
- template <typename T, size_t N>
- KFR_SINTRIN vec<T, N> clamp(vec<T, N> x, T minimum, vec<T, N> maximum)
- {
- return clamp(x, broadcast<N>(minimum), maximum);
- }
- template <typename T, size_t N>
- KFR_SINTRIN vec<T, N> clamp(vec<T, N> x, vec<T, N> minimum, T maximum)
- {
- return clamp(x, minimum, broadcast<N>(maximum));
- }
- template <typename T, size_t N>
- KFR_SINTRIN vec<T, N> clamp(vec<T, N> x, T maximum)
- {
- return clamp(x, broadcast<N>(maximum));
- }
-
- template <typename T, size_t N>
- KFR_SINTRIN vec<T, N> clamp(vec<T, N> x, vec<T, N> minimum, vec<T, N> maximum)
- {
- return max(minimum, min(x, maximum));
- }
- template <typename T, size_t N>
- KFR_SINTRIN vec<T, N> clamp(vec<T, N> x, vec<T, N> maximum)
- {
- return max(zerovector<T, N>(), min(x, maximum));
- }
-
- template <typename T, size_t N>
- KFR_SINTRIN vec<T, N> clampm1(vec<T, N> x, vec<T, N> minimum, vec<T, N> maximum)
- {
- return max(minimum, min(x, maximum - T(1)));
- }
- template <typename T, size_t N>
- KFR_SINTRIN vec<T, N> clampm1(vec<T, N> x, vec<T, N> maximum)
- {
- return max(zerovector<T, N>(), min(x, maximum - T(1)));
- }
- KFR_HANDLE_ALL(clamp)
- KFR_HANDLE_ALL(clampm1)
- KFR_HANDLE_SCALAR(min)
- KFR_HANDLE_SCALAR(max)
- KFR_SPEC_FN(in_clamp, clamp)
- KFR_SPEC_FN(in_clamp, clampm1)
-};
+ return select(x < y, x, y);
}
-
-namespace native
-{
-using fn_min = internal::in_min_max<>::fn_min;
-template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)>
-KFR_INLINE ftype<common_type<T1, T2>> min(const T1& x, const T2& y)
+template <typename T, size_t N>
+KFR_SINTRIN vec<T, N> max(vec<T, N> x, vec<T, N> y)
{
- return internal::in_min_max<>::min(x, y);
+ return select(x > y, x, y);
}
+#endif
-template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)>
-KFR_INLINE expr_func<fn_min, E1, E2> min(E1&& x, E2&& y)
+template <typename T>
+KFR_SINTRIN T min(initialvalue<T>)
{
- return { fn_min(), std::forward<E1>(x), std::forward<E2>(y) };
+ return std::numeric_limits<T>::max();
}
-using fn_max = internal::in_min_max<>::fn_max;
-template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)>
-KFR_INLINE ftype<common_type<T1, T2>> max(const T1& x, const T2& y)
+template <typename T>
+KFR_SINTRIN T max(initialvalue<T>)
{
- return internal::in_min_max<>::max(x, y);
+ return std::numeric_limits<T>::min();
}
-
-template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)>
-KFR_INLINE expr_func<fn_max, E1, E2> max(E1&& x, E2&& y)
+template <typename T>
+KFR_SINTRIN T absmin(initialvalue<T>)
{
- return { fn_max(), std::forward<E1>(x), std::forward<E2>(y) };
+ return std::numeric_limits<T>::max();
}
-using fn_minabs = internal::in_minabs_maxabs<>::fn_minabs;
-template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)>
-KFR_INLINE ftype<common_type<T1, T2>> minabs(const T1& x, const T2& y)
+template <typename T>
+KFR_SINTRIN T absmax(initialvalue<T>)
{
- return internal::in_minabs_maxabs<>::minabs(x, y);
+ return 0;
}
-template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)>
-KFR_INLINE expr_func<fn_minabs, E1, E2> minabs(E1&& x, E2&& y)
-{
- return { fn_minabs(), std::forward<E1>(x), std::forward<E2>(y) };
+KFR_HANDLE_SCALAR_2(min)
+KFR_FN(min)
+KFR_HANDLE_SCALAR_2(max)
+KFR_FN(max)
+KFR_HANDLE_SCALAR_2(absmin)
+KFR_FN(absmin)
+KFR_HANDLE_SCALAR_2(absmax)
+KFR_FN(absmax)
}
-using fn_maxabs = internal::in_minabs_maxabs<>::fn_maxabs;
+
template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)>
-KFR_INLINE ftype<common_type<T1, T2>> maxabs(const T1& x, const T2& y)
+KFR_INTRIN common_type<T1, T2> min(const T1& x, const T2& y)
{
- return internal::in_minabs_maxabs<>::maxabs(x, y);
+ return internal::min(x, y);
}
template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)>
-KFR_INLINE expr_func<fn_maxabs, E1, E2> maxabs(E1&& x, E2&& y)
-{
- return { fn_maxabs(), std::forward<E1>(x), std::forward<E2>(y) };
-}
-using fn_clamp = internal::in_clamp<>::fn_clamp;
-template <typename T1, typename T2, typename T3, KFR_ENABLE_IF(is_numeric_args<T1, T2, T3>::value)>
-KFR_INLINE ftype<common_type<T1, T2, T3>> clamp(const T1& x, const T2& l, const T3& h)
+KFR_INTRIN expr_func<internal::fn_min, E1, E2> min(E1&& x, E2&& y)
{
- return internal::in_clamp<>::clamp(x, l, h);
+ return { {}, std::forward<E1>(x), std::forward<E2>(y) };
}
-template <typename E1, typename E2, typename E3, KFR_ENABLE_IF(is_input_expressions<E1, E2, E3>::value)>
-KFR_INLINE expr_func<fn_clamp, E1, E2, E3> clamp(E1&& x, E2&& l, E3&& h)
-{
- return { fn_clamp(), std::forward<E1>(x), std::forward<E2>(l), std::forward<E3>(h) };
-}
-using fn_clampm1 = internal::in_clamp<>::fn_clampm1;
-template <typename T1, typename T2, typename T3, KFR_ENABLE_IF(is_numeric_args<T1, T2, T3>::value)>
-KFR_INLINE ftype<common_type<T1, T2, T3>> clampm1(const T1& x, const T2& l, const T3& h)
+template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)>
+KFR_INTRIN common_type<T1, T2> max(const T1& x, const T2& y)
{
- return internal::in_clamp<>::clampm1(x, l, h);
+ return internal::max(x, y);
}
-template <typename E1, typename E2, typename E3, KFR_ENABLE_IF(is_input_expressions<E1, E2, E3>::value)>
-KFR_INLINE expr_func<fn_clampm1, E1, E2, E3> clampm1(E1&& x, E2&& l, E3&& h)
+template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)>
+KFR_INTRIN expr_func<internal::fn_max, E1, E2> max(E1&& x, E2&& y)
{
- return { fn_clampm1(), std::forward<E1>(x), std::forward<E2>(l), std::forward<E3>(h) };
+ return { {}, std::forward<E1>(x), std::forward<E2>(y) };
}
-using fn_clamp = internal::in_clamp<>::fn_clamp;
template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)>
-KFR_INLINE ftype<common_type<T1, T2>> clamp(const T1& x, const T2& h)
+KFR_INTRIN common_type<T1, T2> absmin(const T1& x, const T2& y)
{
- return internal::in_clamp<>::clamp(x, h);
+ return internal::absmin(x, y);
}
template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)>
-KFR_INLINE expr_func<fn_clamp, E1, E2> clamp(E1&& x, E2&& h)
+KFR_INTRIN expr_func<internal::fn_absmin, E1, E2> absmin(E1&& x, E2&& y)
{
- return { fn_clamp(), std::forward<E1>(x), std::forward<E2>(h) };
+ return { {}, std::forward<E1>(x), std::forward<E2>(y) };
}
-using fn_clampm1 = internal::in_clamp<>::fn_clampm1;
+
template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)>
-KFR_INLINE ftype<common_type<T1, T2>> clampm1(const T1& x, const T2& h)
+KFR_INTRIN common_type<T1, T2> absmax(const T1& x, const T2& y)
{
- return internal::in_clamp<>::clampm1(x, h);
+ return internal::absmax(x, y);
}
template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)>
-KFR_INLINE expr_func<fn_clampm1, E1, E2> clampm1(E1&& x, E2&& h)
+KFR_INTRIN expr_func<internal::fn_absmax, E1, E2> absmax(E1&& x, E2&& y)
{
- return { fn_clampm1(), std::forward<E1>(x), std::forward<E2>(h) };
-}
+ return { {}, std::forward<E1>(x), std::forward<E2>(y) };
}
}
-
-#pragma clang diagnostic pop
diff --git a/include/kfr/base/modzerobessel.hpp b/include/kfr/base/modzerobessel.hpp
@@ -0,0 +1,113 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+#include "function.hpp"
+#include "log_exp.hpp"
+
+#pragma clang diagnostic push
+#if CID_HAS_WARNING("-Wc99-extensions")
+#pragma clang diagnostic ignored "-Wc99-extensions"
+#endif
+
+namespace kfr
+{
+
+namespace internal
+{
+
+template <typename T>
+constexpr T bessel_coef[] = { T(0.25),
+ T(0.027777777777777776236),
+ T(0.0017361111111111110147),
+ T(6.9444444444444444384e-005),
+ T(1.9290123456790123911e-006),
+ T(3.9367598891408417495e-008),
+ T(6.1511873267825652335e-010),
+ T(7.5940584281266239246e-012),
+ T(7.5940584281266233693e-014),
+ T(6.2760813455591932909e-016),
+ T(4.3583898233049949985e-018),
+ T(2.5789288895295827557e-020),
+ T(1.3157800456783586208e-022),
+ T(5.8479113141260384983e-025),
+ T(2.2843403570804837884e-027),
+ T(7.904291893012054025e-030),
+ T(2.4395962632753252792e-032),
+ T(6.75788438580422547e-035),
+ T(1.689471096451056426e-037),
+ T(3.8310002187098784929e-040),
+ T(7.9152897080782616517e-043),
+ T(1.4962740468957016443e-045),
+ T(2.5976979980828152196e-048),
+ T(4.1563167969325041577e-051),
+ T(6.1483976285983795968e-054),
+ T(8.434015951438105991e-057),
+ T(1.0757673407446563809e-059),
+ T(1.2791526049282476926e-062),
+ T(1.4212806721424974034e-065),
+ T(1.4789601166935457918e-068),
+ T(1.4442969889585408123e-071),
+ T(1.3262598613026086927e-074),
+ T(1.1472836170437790782e-077),
+ T(9.3655805472961564331e-081),
+ T(7.2265282000741942594e-084),
+ T(5.2786911614858977913e-087),
+ T(3.6556032974279072401e-090),
+ T(2.4034209713529963119e-093),
+ T(1.5021381070956226783e-096) };
+
+template <typename T, size_t N>
+KFR_INLINE vec<T, N> modzerobessel(vec<T, N> x)
+{
+ const vec<T, N> x_2 = x * 0.5;
+ const vec<T, N> x_2_sqr = x_2 * x_2;
+ vec<T, N> num = x_2_sqr;
+ vec<T, N> result;
+ result = 1 + x_2_sqr;
+
+ KFR_LOOP_UNROLL
+ for (size_t i = 0; i < (sizeof(T) == 4 ? 20 : 39); i++)
+ {
+ result = fmadd((num *= x_2_sqr), bessel_coef<T>[i], result);
+ }
+ return result;
+}
+
+KFR_HANDLE_SCALAR(modzerobessel)
+KFR_FN(modzerobessel)
+}
+
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+KFR_INTRIN T1 modzerobessel(const T1& x)
+{
+ return internal::modzerobessel(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+KFR_INTRIN expr_func<internal::fn_modzerobessel, E1> modzerobessel(E1&& x)
+{
+ return { {}, std::forward<E1>(x) };
+}
+}
+
+#pragma clang diagnostic pop
diff --git a/include/kfr/base/operators.hpp b/include/kfr/base/operators.hpp
@@ -660,4 +660,40 @@ KFR_INLINE vec<T, N> negodd(const vec<T, N>& x)
{
return x ^ broadcast<N>(T(), -T());
}
+
+
+#define KFR_EXPR_UNARY(fn, op) \
+ template <typename A1, KFR_ENABLE_IF(is_input_expression<A1>::value)> \
+ KFR_INLINE auto operator op(A1&& a1)->decltype(bind_expression(fn(), std::forward<A1>(a1))) \
+ { \
+ return bind_expression(fn(), std::forward<A1>(a1)); \
+ }
+
+#define KFR_EXPR_BINARY(fn, op) \
+ template <typename A1, typename A2, KFR_ENABLE_IF(is_input_expressions<A1, A2>::value)> \
+ KFR_INLINE auto operator op(A1&& a1, A2&& a2) \
+ ->decltype(bind_expression(fn(), std::forward<A1>(a1), std::forward<A2>(a2))) \
+ { \
+ return bind_expression(fn(), std::forward<A1>(a1), std::forward<A2>(a2)); \
+ }
+
+KFR_EXPR_UNARY(fn_neg, -)
+KFR_EXPR_UNARY(fn_bitwisenot, ~)
+
+KFR_EXPR_BINARY(fn_add, +)
+KFR_EXPR_BINARY(fn_sub, -)
+KFR_EXPR_BINARY(fn_mul, *)
+KFR_EXPR_BINARY(fn_div, /)
+KFR_EXPR_BINARY(fn_bitwiseand, &)
+KFR_EXPR_BINARY(fn_bitwiseor, |)
+KFR_EXPR_BINARY(fn_bitwisexor, ^)
+KFR_EXPR_BINARY(fn_shl, <<)
+KFR_EXPR_BINARY(fn_shr, >>)
+
+KFR_EXPR_BINARY(fn_equal, ==)
+KFR_EXPR_BINARY(fn_notequal, !=)
+KFR_EXPR_BINARY(fn_less, <)
+KFR_EXPR_BINARY(fn_greater, >)
+KFR_EXPR_BINARY(fn_lessorequal, <=)
+KFR_EXPR_BINARY(fn_greaterorequal, >=)
}
diff --git a/include/kfr/base/round.hpp b/include/kfr/base/round.hpp
@@ -28,6 +28,9 @@
namespace kfr
{
+namespace internal
+{
+
#define KFR_mm_trunc_ps(V) _mm_round_ps((V), _MM_FROUND_TRUNC)
#define KFR_mm_roundnearest_ps(V) _mm_round_ps((V), _MM_FROUND_NINT)
#define KFR_mm_trunc_pd(V) _mm_round_pd((V), _MM_FROUND_TRUNC)
@@ -48,285 +51,211 @@ namespace kfr
#define KFR_mm256_trunc_pd(V) _mm256_round_pd((V), _MM_FROUND_TRUNC)
#define KFR_mm256_roundnearest_pd(V) _mm256_round_pd((V), _MM_FROUND_NINT)
-namespace internal
-{
+#if defined CID_ARCH_SSE41
-template <cpu_t c = cpu_t::native>
-struct in_round : in_round<older(c)>
-{
- struct fn_floor : in_round<older(c)>::fn_floor, fn_disabled
- {
- };
- struct fn_ceil : in_round<older(c)>::fn_ceil, fn_disabled
- {
- };
- struct fn_round : in_round<older(c)>::fn_round, fn_disabled
- {
- };
- struct fn_trunc : in_round<older(c)>::fn_trunc, fn_disabled
- {
- };
- struct fn_fract : in_round<older(c)>::fn_fract, fn_disabled
- {
- };
-};
+KFR_SINTRIN f32sse floor(f32sse value) { return _mm_floor_ps(*value); }
+KFR_SINTRIN f32sse ceil(f32sse value) { return _mm_ceil_ps(*value); }
+KFR_SINTRIN f32sse trunc(f32sse value) { return KFR_mm_trunc_ps(*value); }
+KFR_SINTRIN f32sse round(f32sse value) { return KFR_mm_roundnearest_ps(*value); }
+KFR_SINTRIN f64sse floor(f64sse value) { return _mm_floor_pd(*value); }
+KFR_SINTRIN f64sse ceil(f64sse value) { return _mm_ceil_pd(*value); }
+KFR_SINTRIN f64sse trunc(f64sse value) { return KFR_mm_trunc_pd(*value); }
+KFR_SINTRIN f64sse round(f64sse value) { return KFR_mm_roundnearest_pd(*value); }
+KFR_SINTRIN f32sse fract(f32sse x) { return x - floor(x); }
+KFR_SINTRIN f64sse fract(f64sse x) { return x - floor(x); }
-template <>
-struct in_round<cpu_t::common>
-{
- constexpr static cpu_t cpu = cpu_t::common;
+#if defined CID_ARCH_AVX
- template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)>
- KFR_SINTRIN vec<T, N> floor(vec<T, N> value)
- {
- return value;
- }
- template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)>
- KFR_SINTRIN vec<T, N> ceil(vec<T, N> value)
- {
- return value;
- }
- template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)>
- KFR_SINTRIN vec<T, N> trunc(vec<T, N> value)
- {
- return value;
- }
- template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)>
- KFR_SINTRIN vec<T, N> round(vec<T, N> value)
- {
- return value;
- }
- template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)>
- KFR_SINTRIN vec<T, N> fract(vec<T, N>)
- {
- return T();
- }
+KFR_SINTRIN f32avx floor(f32avx value) { return _mm256_floor_ps(*value); }
+KFR_SINTRIN f32avx ceil(f32avx value) { return _mm256_ceil_ps(*value); }
+KFR_SINTRIN f32avx trunc(f32avx value) { return KFR_mm256_trunc_ps(*value); }
+KFR_SINTRIN f32avx round(f32avx value) { return KFR_mm256_roundnearest_ps(*value); }
+KFR_SINTRIN f64avx floor(f64avx value) { return _mm256_floor_pd(*value); }
+KFR_SINTRIN f64avx ceil(f64avx value) { return _mm256_ceil_pd(*value); }
+KFR_SINTRIN f64avx trunc(f64avx value) { return KFR_mm256_trunc_pd(*value); }
+KFR_SINTRIN f64avx round(f64avx value) { return KFR_mm256_roundnearest_pd(*value); }
+KFR_SINTRIN f32avx fract(f32avx x) { return x - floor(x); }
+KFR_SINTRIN f64avx fract(f64avx x) { return x - floor(x); }
+#endif
- template <size_t N>
- KFR_SINTRIN vec<f32, N> floor(vec<f32, N> x)
- {
- vec<f32, N> t = cast<f32>(cast<i32>(x));
- return t - (bitcast<f32>(x < t) & 1.f);
- }
- template <size_t N>
- KFR_SINTRIN vec<f64, N> floor(vec<f64, N> x)
- {
- vec<f64, N> t = cast<f64>(cast<i64>(x));
- return t - (bitcast<f64>(x < t) & 1.0);
- }
- template <size_t N>
- KFR_SINTRIN vec<f32, N> ceil(vec<f32, N> x)
- {
- vec<f32, N> t = cast<f32>(cast<i32>(x));
- return t + (bitcast<f32>(x > t) & 1.f);
- }
- template <size_t N>
- KFR_SINTRIN vec<f64, N> ceil(vec<f64, N> x)
- {
- vec<f64, N> t = cast<f64>(cast<i64>(x));
- return t + (bitcast<f64>(x > t) & 1.0);
- }
- template <size_t N>
- KFR_SINTRIN vec<f32, N> round(vec<f32, N> x)
- {
- return cast<f32>(cast<i32>(x + mulsign(broadcast<N>(0.5f), x)));
- }
- template <size_t N>
- KFR_SINTRIN vec<f64, N> round(vec<f64, N> x)
- {
- return cast<f64>(cast<i64>(x + mulsign(broadcast<N>(0.5), x)));
- }
- template <size_t N>
- KFR_SINTRIN vec<f32, N> trunc(vec<f32, N> x)
- {
- return cast<f32>(cast<i32>(x));
- }
- template <size_t N>
- KFR_SINTRIN vec<f64, N> trunc(vec<f64, N> x)
- {
- return cast<f64>(cast<i64>(x));
- }
- template <size_t N>
- KFR_SINTRIN vec<f32, N> fract(vec<f32, N> x)
- {
- return x - floor(x);
- }
- template <size_t N>
- KFR_SINTRIN vec<f64, N> fract(vec<f64, N> x)
- {
- return x - floor(x);
- }
+KFR_HANDLE_ALL_SIZES_1(floor)
+KFR_HANDLE_ALL_SIZES_1(ceil)
+KFR_HANDLE_ALL_SIZES_1(round)
+KFR_HANDLE_ALL_SIZES_1(trunc)
+KFR_HANDLE_ALL_SIZES_1(fract)
- KFR_HANDLE_SCALAR(floor)
- KFR_HANDLE_SCALAR(ceil)
- KFR_HANDLE_SCALAR(round)
- KFR_HANDLE_SCALAR(trunc)
- KFR_HANDLE_SCALAR(fract)
- KFR_SPEC_FN(in_round, floor)
- KFR_SPEC_FN(in_round, ceil)
- KFR_SPEC_FN(in_round, round)
- KFR_SPEC_FN(in_round, trunc)
- KFR_SPEC_FN(in_round, fract)
-};
+#else
-#ifdef CID_ARCH_X86
+// fallback
-template <>
-struct in_round<cpu_t::sse41> : in_round<cpu_t::common>
+template <size_t N>
+KFR_SINTRIN vec<f32, N> floor(vec<f32, N> x)
{
- constexpr static cpu_t cpu = cpu_t::sse41;
-
- KFR_SINTRIN f32sse floor(f32sse value) { return _mm_floor_ps(*value); }
- KFR_SINTRIN f32sse ceil(f32sse value) { return _mm_ceil_ps(*value); }
- KFR_SINTRIN f32sse trunc(f32sse value) { return KFR_mm_trunc_ps(*value); }
- KFR_SINTRIN f32sse round(f32sse value) { return KFR_mm_roundnearest_ps(*value); }
- KFR_SINTRIN f64sse floor(f64sse value) { return _mm_floor_pd(*value); }
- KFR_SINTRIN f64sse ceil(f64sse value) { return _mm_ceil_pd(*value); }
- KFR_SINTRIN f64sse trunc(f64sse value) { return KFR_mm_trunc_pd(*value); }
- KFR_SINTRIN f64sse round(f64sse value) { return KFR_mm_roundnearest_pd(*value); }
- KFR_SINTRIN f32sse fract(f32sse x) { return x - floor(x); }
- KFR_SINTRIN f64sse fract(f64sse x) { return x - floor(x); }
-
- KFR_HANDLE_ALL(floor)
- KFR_HANDLE_ALL(ceil)
- KFR_HANDLE_ALL(round)
- KFR_HANDLE_ALL(trunc)
- KFR_HANDLE_ALL(fract)
- KFR_HANDLE_SCALAR(floor)
- KFR_HANDLE_SCALAR(ceil)
- KFR_HANDLE_SCALAR(round)
- KFR_HANDLE_SCALAR(trunc)
- KFR_HANDLE_SCALAR(fract)
- KFR_SPEC_FN(in_round, floor)
- KFR_SPEC_FN(in_round, ceil)
- KFR_SPEC_FN(in_round, round)
- KFR_SPEC_FN(in_round, trunc)
- KFR_SPEC_FN(in_round, fract)
-};
-
-template <>
-struct in_round<cpu_t::avx1> : in_round<cpu_t::sse41>
+ vec<f32, N> t = cast<f32>(cast<i32>(x));
+ return t - (bitcast<f32>(x < t) & 1.f);
+}
+template <size_t N>
+KFR_SINTRIN vec<f64, N> floor(vec<f64, N> x)
{
- constexpr static cpu_t cpu = cpu_t::avx1;
- using in_round<cpu_t::sse41>::floor;
- using in_round<cpu_t::sse41>::ceil;
- using in_round<cpu_t::sse41>::trunc;
- using in_round<cpu_t::sse41>::round;
- using in_round<cpu_t::sse41>::fract;
-
- KFR_SINTRIN f32avx floor(f32avx value) { return _mm256_floor_ps(*value); }
- KFR_SINTRIN f32avx ceil(f32avx value) { return _mm256_ceil_ps(*value); }
- KFR_SINTRIN f32avx trunc(f32avx value) { return KFR_mm256_trunc_ps(*value); }
- KFR_SINTRIN f32avx round(f32avx value) { return KFR_mm256_roundnearest_ps(*value); }
- KFR_SINTRIN f64avx floor(f64avx value) { return _mm256_floor_pd(*value); }
- KFR_SINTRIN f64avx ceil(f64avx value) { return _mm256_ceil_pd(*value); }
- KFR_SINTRIN f64avx trunc(f64avx value) { return KFR_mm256_trunc_pd(*value); }
- KFR_SINTRIN f64avx round(f64avx value) { return KFR_mm256_roundnearest_pd(*value); }
- KFR_SINTRIN f32avx fract(f32avx x) { return x - floor(x); }
- KFR_SINTRIN f64avx fract(f64avx x) { return x - floor(x); }
-
- KFR_HANDLE_ALL(floor)
- KFR_HANDLE_ALL(ceil)
- KFR_HANDLE_ALL(round)
- KFR_HANDLE_ALL(trunc)
- KFR_HANDLE_ALL(fract)
- KFR_HANDLE_SCALAR(floor)
- KFR_HANDLE_SCALAR(ceil)
- KFR_HANDLE_SCALAR(round)
- KFR_HANDLE_SCALAR(trunc)
- KFR_HANDLE_SCALAR(fract)
- KFR_SPEC_FN(in_round, floor)
- KFR_SPEC_FN(in_round, ceil)
- KFR_SPEC_FN(in_round, round)
- KFR_SPEC_FN(in_round, trunc)
- KFR_SPEC_FN(in_round, fract)
-};
+ vec<f64, N> t = cast<f64>(cast<i64>(x));
+ return t - (bitcast<f64>(x < t) & 1.0);
+}
+template <size_t N>
+KFR_SINTRIN vec<f32, N> ceil(vec<f32, N> x)
+{
+ vec<f32, N> t = cast<f32>(cast<i32>(x));
+ return t + (bitcast<f32>(x > t) & 1.f);
+}
+template <size_t N>
+KFR_SINTRIN vec<f64, N> ceil(vec<f64, N> x)
+{
+ vec<f64, N> t = cast<f64>(cast<i64>(x));
+ return t + (bitcast<f64>(x > t) & 1.0);
+}
+template <size_t N>
+KFR_SINTRIN vec<f32, N> round(vec<f32, N> x)
+{
+ return cast<f32>(cast<i32>(x + mulsign(broadcast<N>(0.5f), x)));
+}
+template <size_t N>
+KFR_SINTRIN vec<f64, N> round(vec<f64, N> x)
+{
+ return cast<f64>(cast<i64>(x + mulsign(broadcast<N>(0.5), x)));
+}
+template <size_t N>
+KFR_SINTRIN vec<f32, N> trunc(vec<f32, N> x)
+{
+ return cast<f32>(cast<i32>(x));
+}
+template <size_t N>
+KFR_SINTRIN vec<f64, N> trunc(vec<f64, N> x)
+{
+ return cast<f64>(cast<i64>(x));
+}
+template <size_t N>
+KFR_SINTRIN vec<f32, N> fract(vec<f32, N> x)
+{
+ return x - floor(x);
+}
+template <size_t N>
+KFR_SINTRIN vec<f64, N> fract(vec<f64, N> x)
+{
+ return x - floor(x);
+}
+#endif
-#undef KFR_mm_trunc_ps
-#undef KFR_mm_roundnearest_ps
-#undef KFR_mm_trunc_pd
-#undef KFR_mm_roundnearest_pd
-#undef KFR_mm_trunc_ss
-#undef KFR_mm_roundnearest_ss
-#undef KFR_mm_trunc_sd
-#undef KFR_mm_roundnearest_sd
-#undef KFR_mm_floor_ss
-#undef KFR_mm_floor_sd
-#undef KFR_mm_ceil_ss
-#undef KFR_mm_ceil_sd
-#undef KFR_mm256_trunc_ps
-#undef KFR_mm256_roundnearest_ps
-#undef KFR_mm256_trunc_pd
-#undef KFR_mm256_roundnearest_pd
+// template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)>
+// KFR_SINTRIN vec<T, N> floor(vec<T, N> value)
+//{
+// return value;
+//}
+// template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)>
+// KFR_SINTRIN vec<T, N> ceil(vec<T, N> value)
+//{
+// return value;
+//}
+// template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)>
+// KFR_SINTRIN vec<T, N> trunc(vec<T, N> value)
+//{
+// return value;
+//}
+// template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)>
+// KFR_SINTRIN vec<T, N> round(vec<T, N> value)
+//{
+// return value;
+//}
+// template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)>
+// KFR_SINTRIN vec<T, N> fract(vec<T, N>)
+//{
+// return T(0);
+//}
-#endif
+KFR_HANDLE_SCALAR_1(floor)
+KFR_HANDLE_SCALAR_1(ceil)
+KFR_HANDLE_SCALAR_1(round)
+KFR_HANDLE_SCALAR_1(trunc)
+KFR_HANDLE_SCALAR_1(fract)
+KFR_FN(floor)
+KFR_FN(ceil)
+KFR_FN(round)
+KFR_FN(trunc)
+KFR_FN(fract)
}
-namespace native
-{
-using fn_floor = internal::in_round<>::fn_floor;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
KFR_INTRIN ftype<T1> floor(const T1& x)
{
- return internal::in_round<>::floor(x);
+ return internal::floor(x);
}
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_floor, E1> floor(E1&& x)
+KFR_INTRIN expr_func<internal::fn_floor, E1> floor(E1&& x)
{
- return { fn_floor(), std::forward<E1>(x) };
+ return { {}, std::forward<E1>(x) };
}
-using fn_ceil = internal::in_round<>::fn_ceil;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
KFR_INTRIN ftype<T1> ceil(const T1& x)
{
- return internal::in_round<>::ceil(x);
+ return internal::ceil(x);
}
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_ceil, E1> ceil(E1&& x)
+KFR_INTRIN expr_func<internal::fn_ceil, E1> ceil(E1&& x)
{
- return { fn_ceil(), std::forward<E1>(x) };
+ return { {}, std::forward<E1>(x) };
}
-using fn_round = internal::in_round<>::fn_round;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
KFR_INTRIN ftype<T1> round(const T1& x)
{
- return internal::in_round<>::round(x);
+ return internal::round(x);
}
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_round, E1> round(E1&& x)
+KFR_INTRIN expr_func<internal::fn_round, E1> round(E1&& x)
{
- return { fn_round(), std::forward<E1>(x) };
+ return { {}, std::forward<E1>(x) };
}
-using fn_trunc = internal::in_round<>::fn_trunc;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
KFR_INTRIN ftype<T1> trunc(const T1& x)
{
- return internal::in_round<>::trunc(x);
+ return internal::trunc(x);
}
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_trunc, E1> trunc(E1&& x)
+KFR_INTRIN expr_func<internal::fn_trunc, E1> trunc(E1&& x)
{
- return { fn_trunc(), std::forward<E1>(x) };
+ return { {}, std::forward<E1>(x) };
}
-using fn_fract = internal::in_round<>::fn_fract;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
KFR_INTRIN ftype<T1> fract(const T1& x)
{
- return internal::in_round<>::fract(x);
+ return internal::fract(x);
}
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_fract, E1> fract(E1&& x)
+KFR_INTRIN expr_func<internal::fn_fract, E1> fract(E1&& x)
{
- return { fn_fract(), std::forward<E1>(x) };
-}
+ return { {}, std::forward<E1>(x) };
}
}
+
+#undef KFR_mm_trunc_ps
+#undef KFR_mm_roundnearest_ps
+#undef KFR_mm_trunc_pd
+#undef KFR_mm_roundnearest_pd
+#undef KFR_mm_trunc_ss
+#undef KFR_mm_roundnearest_ss
+#undef KFR_mm_trunc_sd
+#undef KFR_mm_roundnearest_sd
+#undef KFR_mm_floor_ss
+#undef KFR_mm_floor_sd
+#undef KFR_mm_ceil_ss
+#undef KFR_mm_ceil_sd
+#undef KFR_mm256_trunc_ps
+#undef KFR_mm256_roundnearest_ps
+#undef KFR_mm256_trunc_pd
+#undef KFR_mm256_roundnearest_pd
diff --git a/include/kfr/base/saturation.hpp b/include/kfr/base/saturation.hpp
@@ -25,181 +25,125 @@
#include "function.hpp"
#include "select.hpp"
-#pragma clang diagnostic push
-#if CID_HAS_WARNING("-Winaccessible-base")
-#pragma clang diagnostic ignored "-Winaccessible-base"
-#endif
-
namespace kfr
{
namespace internal
{
-
-template <cpu_t c = cpu_t::native, cpu_t cc = c>
-struct in_saturated : in_saturated<older(c), cc>
+template <typename T, size_t N>
+KFR_SINTRIN vec<T, N> saturated_signed_add(vec<T, N> a, vec<T, N> b)
{
- struct fn_satadd : in_saturated<older(c), cc>::fn_satadd, fn_disabled
- {
- };
- struct fn_satsub : in_saturated<older(c), cc>::fn_satsub, fn_disabled
- {
- };
-};
-
-template <cpu_t cc>
-struct in_saturated<cpu_t::common, cc> : in_select<cc>
+ constexpr size_t shift = typebits<i32>::bits - 1;
+ const vec<T, N> sum = a + b;
+ a = (a >> shift) + allonesvector(a);
+
+ return select(((a ^ b) | ~(b ^ sum)) >= 0, a, sum);
+}
+template <typename T, size_t N>
+KFR_SINTRIN vec<T, N> saturated_signed_sub(vec<T, N> a, vec<T, N> b)
{
- constexpr static cpu_t cpu = cpu_t::common;
-
- template <typename T, size_t N, KFR_ENABLE_IF(std::is_signed<T>::value)>
- KFR_SINTRIN vec<T, N> satadd(vec<T, N> a, vec<T, N> b)
- {
- return saturated_signed_add(a, b);
- }
- template <typename T, size_t N, KFR_ENABLE_IF(std::is_unsigned<T>::value)>
- KFR_SINTRIN vec<T, N> satadd(vec<T, N> a, vec<T, N> b)
- {
- return saturated_unsigned_add(a, b);
- }
-
- template <typename T, size_t N, KFR_ENABLE_IF(std::is_signed<T>::value)>
- KFR_SINTRIN vec<T, N> satsub(vec<T, N> a, vec<T, N> b)
- {
- return saturated_signed_sub(a, b);
- }
- template <typename T, size_t N, KFR_ENABLE_IF(std::is_unsigned<T>::value)>
- KFR_SINTRIN vec<T, N> satsub(vec<T, N> a, vec<T, N> b)
- {
- return saturated_unsigned_sub(a, b);
- }
- KFR_SPEC_FN(in_saturated, satadd)
- KFR_SPEC_FN(in_saturated, satsub)
-
-protected:
- template <typename T, size_t N>
- KFR_SINTRIN vec<T, N> saturated_signed_add(vec<T, N> a, vec<T, N> b)
- {
- constexpr size_t shift = typebits<i32>::bits - 1;
- const vec<T, N> sum = a + b;
- a = (a >> shift) + allonesvector(a);
-
- return select(((a ^ b) | ~(b ^ sum)) >= 0, a, sum);
- }
- template <typename T, size_t N>
- KFR_SINTRIN vec<T, N> saturated_signed_sub(vec<T, N> a, vec<T, N> b)
- {
- constexpr size_t shift = typebits<i32>::bits - 1;
- const vec<T, N> diff = a - b;
- a = (a >> shift) + allonesvector(a);
-
- return select(((a ^ b) & (a ^ diff)) < 0, a, diff);
- }
- template <typename T, size_t N>
- KFR_SINTRIN vec<T, N> saturated_unsigned_add(vec<T, N> a, vec<T, N> b)
- {
- constexpr vec<T, N> t = allonesvector(a);
- return select(a > t - b, t, a + b);
- }
- template <typename T, size_t N>
- KFR_SINTRIN vec<T, N> saturated_unsigned_sub(vec<T, N> a, vec<T, N> b)
- {
- return select(a < b, zerovector(a), a - b);
- }
-};
-
-#ifdef CID_ARCH_X86
-
-template <cpu_t cc>
-struct in_saturated<cpu_t::sse2, cc> : in_saturated<cpu_t::common>, in_select<cc>
+ constexpr size_t shift = typebits<i32>::bits - 1;
+ const vec<T, N> diff = a - b;
+ a = (a >> shift) + allonesvector(a);
+
+ return select(((a ^ b) & (a ^ diff)) < 0, a, diff);
+}
+template <typename T, size_t N>
+KFR_SINTRIN vec<T, N> saturated_unsigned_add(vec<T, N> a, vec<T, N> b)
{
- constexpr static cpu_t cpu = cpu_t::sse2;
-
-private:
- using in_select<cc>::select;
-
-public:
- KFR_SINTRIN u8sse satadd(u8sse x, u8sse y) { return _mm_adds_epu8(*x, *y); }
- KFR_SINTRIN i8sse satadd(i8sse x, i8sse y) { return _mm_adds_epi8(*x, *y); }
- KFR_SINTRIN u16sse satadd(u16sse x, u16sse y) { return _mm_adds_epu16(*x, *y); }
- KFR_SINTRIN i16sse satadd(i16sse x, i16sse y) { return _mm_adds_epi16(*x, *y); }
-
- KFR_SINTRIN u8sse satsub(u8sse x, u8sse y) { return _mm_subs_epu8(*x, *y); }
- KFR_SINTRIN i8sse satsub(i8sse x, i8sse y) { return _mm_subs_epi8(*x, *y); }
- KFR_SINTRIN u16sse satsub(u16sse x, u16sse y) { return _mm_subs_epu16(*x, *y); }
- KFR_SINTRIN i16sse satsub(i16sse x, i16sse y) { return _mm_subs_epi16(*x, *y); }
-
- KFR_SINTRIN i32sse satadd(i32sse a, i32sse b) { return saturated_signed_add(a, b); }
- KFR_SINTRIN i64sse satadd(i64sse a, i64sse b) { return saturated_signed_add(a, b); }
- KFR_SINTRIN u32sse satadd(u32sse a, u32sse b) { return saturated_unsigned_add(a, b); }
- KFR_SINTRIN u64sse satadd(u64sse a, u64sse b) { return saturated_unsigned_add(a, b); }
-
- KFR_SINTRIN i32sse satsub(i32sse a, i32sse b) { return saturated_signed_sub(a, b); }
- KFR_SINTRIN i64sse satsub(i64sse a, i64sse b) { return saturated_signed_sub(a, b); }
- KFR_SINTRIN u32sse satsub(u32sse a, u32sse b) { return saturated_unsigned_sub(a, b); }
- KFR_SINTRIN u64sse satsub(u64sse a, u64sse b) { return saturated_unsigned_sub(a, b); }
-
- KFR_HANDLE_ALL(satadd)
- KFR_HANDLE_ALL(satsub)
- KFR_HANDLE_SCALAR(satadd)
- KFR_HANDLE_SCALAR(satsub)
- KFR_SPEC_FN(in_saturated, satadd)
- KFR_SPEC_FN(in_saturated, satsub)
-};
-
-template <cpu_t cc>
-struct in_saturated<cpu_t::avx2, cc> : in_saturated<cpu_t::sse2, cc>
+ const vec<T, N> t = allonesvector(a);
+ return select(a > t - b, t, a + b);
+}
+template <typename T, size_t N>
+KFR_SINTRIN vec<T, N> saturated_unsigned_sub(vec<T, N> a, vec<T, N> b)
{
- constexpr static cpu_t cpu = cpu_t::avx2;
- using in_saturated<cpu_t::sse2, cc>::satadd;
- using in_saturated<cpu_t::sse2, cc>::satsub;
-
- KFR_SINTRIN u8avx satadd(u8avx x, u8avx y) { return _mm256_adds_epu8(*x, *y); }
- KFR_SINTRIN i8avx satadd(i8avx x, i8avx y) { return _mm256_adds_epi8(*x, *y); }
- KFR_SINTRIN u16avx satadd(u16avx x, u16avx y) { return _mm256_adds_epu16(*x, *y); }
- KFR_SINTRIN i16avx satadd(i16avx x, i16avx y) { return _mm256_adds_epi16(*x, *y); }
-
- KFR_SINTRIN u8avx satsub(u8avx x, u8avx y) { return _mm256_subs_epu8(*x, *y); }
- KFR_SINTRIN i8avx satsub(i8avx x, i8avx y) { return _mm256_subs_epi8(*x, *y); }
- KFR_SINTRIN u16avx satsub(u16avx x, u16avx y) { return _mm256_subs_epu16(*x, *y); }
- KFR_SINTRIN i16avx satsub(i16avx x, i16avx y) { return _mm256_subs_epi16(*x, *y); }
-
- KFR_HANDLE_ALL(satadd)
- KFR_HANDLE_ALL(satsub)
- KFR_HANDLE_SCALAR(satadd)
- KFR_HANDLE_SCALAR(satsub)
- KFR_SPEC_FN(in_saturated, satadd)
- KFR_SPEC_FN(in_saturated, satsub)
-};
+ return select(a < b, zerovector(a), a - b);
+}
+
+#if defined CID_ARCH_SSE2
+
+KFR_SINTRIN u8sse satadd(u8sse x, u8sse y) { return _mm_adds_epu8(*x, *y); }
+KFR_SINTRIN i8sse satadd(i8sse x, i8sse y) { return _mm_adds_epi8(*x, *y); }
+KFR_SINTRIN u16sse satadd(u16sse x, u16sse y) { return _mm_adds_epu16(*x, *y); }
+KFR_SINTRIN i16sse satadd(i16sse x, i16sse y) { return _mm_adds_epi16(*x, *y); }
+
+KFR_SINTRIN u8sse satsub(u8sse x, u8sse y) { return _mm_subs_epu8(*x, *y); }
+KFR_SINTRIN i8sse satsub(i8sse x, i8sse y) { return _mm_subs_epi8(*x, *y); }
+KFR_SINTRIN u16sse satsub(u16sse x, u16sse y) { return _mm_subs_epu16(*x, *y); }
+KFR_SINTRIN i16sse satsub(i16sse x, i16sse y) { return _mm_subs_epi16(*x, *y); }
+
+KFR_SINTRIN i32sse satadd(i32sse a, i32sse b) { return saturated_signed_add(a, b); }
+KFR_SINTRIN i64sse satadd(i64sse a, i64sse b) { return saturated_signed_add(a, b); }
+KFR_SINTRIN u32sse satadd(u32sse a, u32sse b) { return saturated_unsigned_add(a, b); }
+KFR_SINTRIN u64sse satadd(u64sse a, u64sse b) { return saturated_unsigned_add(a, b); }
+
+KFR_SINTRIN i32sse satsub(i32sse a, i32sse b) { return saturated_signed_sub(a, b); }
+KFR_SINTRIN i64sse satsub(i64sse a, i64sse b) { return saturated_signed_sub(a, b); }
+KFR_SINTRIN u32sse satsub(u32sse a, u32sse b) { return saturated_unsigned_sub(a, b); }
+KFR_SINTRIN u64sse satsub(u64sse a, u64sse b) { return saturated_unsigned_sub(a, b); }
+
+#if defined CID_ARCH_AVX2
+KFR_SINTRIN u8avx satadd(u8avx x, u8avx y) { return _mm256_adds_epu8(*x, *y); }
+KFR_SINTRIN i8avx satadd(i8avx x, i8avx y) { return _mm256_adds_epi8(*x, *y); }
+KFR_SINTRIN u16avx satadd(u16avx x, u16avx y) { return _mm256_adds_epu16(*x, *y); }
+KFR_SINTRIN i16avx satadd(i16avx x, i16avx y) { return _mm256_adds_epi16(*x, *y); }
+
+KFR_SINTRIN u8avx satsub(u8avx x, u8avx y) { return _mm256_subs_epu8(*x, *y); }
+KFR_SINTRIN i8avx satsub(i8avx x, i8avx y) { return _mm256_subs_epi8(*x, *y); }
+KFR_SINTRIN u16avx satsub(u16avx x, u16avx y) { return _mm256_subs_epu16(*x, *y); }
+KFR_SINTRIN i16avx satsub(i16avx x, i16avx y) { return _mm256_subs_epi16(*x, *y); }
#endif
+
+#else
+// fallback
+template <typename T, size_t N, KFR_ENABLE_IF(std::is_signed<T>::value)>
+KFR_SINTRIN vec<T, N> satadd(vec<T, N> a, vec<T, N> b)
+{
+ return saturated_signed_add(a, b);
}
-namespace native
+template <typename T, size_t N, KFR_ENABLE_IF(std::is_unsigned<T>::value)>
+KFR_SINTRIN vec<T, N> satadd(vec<T, N> a, vec<T, N> b)
{
-using fn_satadd = internal::in_saturated<>::fn_satadd;
+ return saturated_unsigned_add(a, b);
+}
+template <typename T, size_t N, KFR_ENABLE_IF(std::is_signed<T>::value)>
+KFR_SINTRIN vec<T, N> satsub(vec<T, N> a, vec<T, N> b)
+{
+ return saturated_signed_sub(a, b);
+}
+template <typename T, size_t N, KFR_ENABLE_IF(std::is_unsigned<T>::value)>
+KFR_SINTRIN vec<T, N> satsub(vec<T, N> a, vec<T, N> b)
+{
+ return saturated_unsigned_sub(a, b);
+}
+#endif
+KFR_HANDLE_SCALAR_1(satadd)
+KFR_FN(satadd)
+KFR_HANDLE_SCALAR_1(satsub)
+KFR_FN(satsub)
+}
+
template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)>
-KFR_INLINE ftype<common_type<T1, T2>> satadd(const T1& x, const T2& y)
+KFR_INTRIN common_type<T1, T2> satadd(const T1& x, const T2& y)
{
- return internal::in_saturated<>::satadd(x, y);
+ return internal::satadd(x, y);
}
template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)>
-KFR_INLINE expr_func<fn_satadd, E1, E2> satadd(E1&& x, E2&& y)
+KFR_INTRIN expr_func<internal::fn_satadd, E1, E2> satadd(E1&& x, E2&& y)
{
- return { fn_satadd(), std::forward<E1>(x), std::forward<E2>(y) };
+ return { {}, std::forward<E1>(x), std::forward<E2>(y) };
}
-using fn_satsub = internal::in_saturated<>::fn_satsub;
+
template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)>
-KFR_INLINE ftype<common_type<T1, T2>> satsub(const T1& x, const T2& y)
+KFR_INTRIN common_type<T1, T2> satsub(const T1& x, const T2& y)
{
- return internal::in_saturated<>::satsub(x, y);
+ return internal::satsub(x, y);
}
template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)>
-KFR_INLINE expr_func<fn_satsub, E1, E2> satsub(E1&& x, E2&& y)
+KFR_INTRIN expr_func<internal::fn_satsub, E1, E2> satsub(E1&& x, E2&& y)
{
- return { fn_satsub(), std::forward<E1>(x), std::forward<E2>(y) };
+ return { {}, std::forward<E1>(x), std::forward<E2>(y) };
}
}
-}
-
-#pragma clang diagnostic pop
diff --git a/include/kfr/base/select.hpp b/include/kfr/base/select.hpp
@@ -29,174 +29,71 @@ namespace kfr
namespace internal
{
-template <cpu_t c>
-struct in_select_impl : in_select_impl<older(c)>
-{
- struct fn_select : fn_disabled
- {
- };
-};
-
-template <>
-struct in_select_impl<cpu_t::common>
-{
- constexpr static cpu_t cur = cpu_t::common;
-
- template <typename T, size_t N>
- KFR_SINTRIN vec<T, N> select(vec<T, N> m, vec<T, N> x, vec<T, N> y)
- {
- return y ^ ((x ^ y) & m);
- }
- KFR_SPEC_FN(in_select_impl, select)
-};
+#if defined CID_ARCH_SSE41
+
+KFR_SINTRIN u8sse select(mu8sse m, u8sse x, u8sse y) { return _mm_blendv_epi8(*y, *x, *m); }
+KFR_SINTRIN u16sse select(mu16sse m, u16sse x, u16sse y) { return _mm_blendv_epi8(*y, *x, *m); }
+KFR_SINTRIN u32sse select(mu32sse m, u32sse x, u32sse y) { return _mm_blendv_epi8(*y, *x, *m); }
+KFR_SINTRIN u64sse select(mu64sse m, u64sse x, u64sse y) { return _mm_blendv_epi8(*y, *x, *m); }
+KFR_SINTRIN i8sse select(mi8sse m, i8sse x, i8sse y) { return _mm_blendv_epi8(*y, *x, *m); }
+KFR_SINTRIN i16sse select(mi16sse m, i16sse x, i16sse y) { return _mm_blendv_epi8(*y, *x, *m); }
+KFR_SINTRIN i32sse select(mi32sse m, i32sse x, i32sse y) { return _mm_blendv_epi8(*y, *x, *m); }
+KFR_SINTRIN i64sse select(mi64sse m, i64sse x, i64sse y) { return _mm_blendv_epi8(*y, *x, *m); }
+KFR_SINTRIN f32sse select(mf32sse m, f32sse x, f32sse y) { return _mm_blendv_ps(*y, *x, *m); }
+KFR_SINTRIN f64sse select(mf64sse m, f64sse x, f64sse y) { return _mm_blendv_pd(*y, *x, *m); }
+
+#if defined CID_ARCH_AVX
+KFR_SINTRIN f64avx select(mf64avx m, f64avx x, f64avx y) { return _mm256_blendv_pd(*y, *x, *m); }
+KFR_SINTRIN f32avx select(mf32avx m, f32avx x, f32avx y) { return _mm256_blendv_ps(*y, *x, *m); }
+#endif
-#ifdef CID_ARCH_X86
+#if defined CID_ARCH_AVX2
+KFR_SINTRIN u8avx select(mu8avx m, u8avx x, u8avx y) { return _mm256_blendv_epi8(*y, *x, *m); }
+KFR_SINTRIN u16avx select(mu16avx m, u16avx x, u16avx y) { return _mm256_blendv_epi8(*y, *x, *m); }
+KFR_SINTRIN u32avx select(mu32avx m, u32avx x, u32avx y) { return _mm256_blendv_epi8(*y, *x, *m); }
+KFR_SINTRIN u64avx select(mu64avx m, u64avx x, u64avx y) { return _mm256_blendv_epi8(*y, *x, *m); }
+KFR_SINTRIN i8avx select(mi8avx m, i8avx x, i8avx y) { return _mm256_blendv_epi8(*y, *x, *m); }
+KFR_SINTRIN i16avx select(mi16avx m, i16avx x, i16avx y) { return _mm256_blendv_epi8(*y, *x, *m); }
+KFR_SINTRIN i32avx select(mi32avx m, i32avx x, i32avx y) { return _mm256_blendv_epi8(*y, *x, *m); }
+KFR_SINTRIN i64avx select(mi64avx m, i64avx x, i64avx y) { return _mm256_blendv_epi8(*y, *x, *m); }
+#endif
-template <>
-struct in_select_impl<cpu_t::sse41> : in_select_impl<cpu_t::common>
+template <typename T, size_t N, KFR_ENABLE_IF(N < vector_width<T, cpu_t::native>)>
+KFR_SINTRIN vec<T, N> select(mask<T, N> a, vec<T, N> b, vec<T, N> c)
{
- constexpr static cpu_t cpu = cpu_t::sse41;
-
- KFR_CPU_INTRIN(sse41) u8sse select(u8sse m, u8sse x, u8sse y) { return _mm_blendv_epi8(*y, *x, *m); }
- KFR_CPU_INTRIN(sse41) u16sse select(u16sse m, u16sse x, u16sse y) { return _mm_blendv_epi8(*y, *x, *m); }
- KFR_CPU_INTRIN(sse41) u32sse select(u32sse m, u32sse x, u32sse y) { return _mm_blendv_epi8(*y, *x, *m); }
- KFR_CPU_INTRIN(sse41) u64sse select(u64sse m, u64sse x, u64sse y) { return _mm_blendv_epi8(*y, *x, *m); }
- KFR_CPU_INTRIN(sse41) i8sse select(i8sse m, i8sse x, i8sse y) { return _mm_blendv_epi8(*y, *x, *m); }
- KFR_CPU_INTRIN(sse41) i16sse select(i16sse m, i16sse x, i16sse y) { return _mm_blendv_epi8(*y, *x, *m); }
- KFR_CPU_INTRIN(sse41) i32sse select(i32sse m, i32sse x, i32sse y) { return _mm_blendv_epi8(*y, *x, *m); }
- KFR_CPU_INTRIN(sse41) i64sse select(i64sse m, i64sse x, i64sse y) { return _mm_blendv_epi8(*y, *x, *m); }
- KFR_CPU_INTRIN(sse41) f32sse select(f32sse m, f32sse x, f32sse y) { return _mm_blendv_ps(*y, *x, *m); }
- KFR_CPU_INTRIN(sse41) f64sse select(f64sse m, f64sse x, f64sse y) { return _mm_blendv_pd(*y, *x, *m); }
-
- KFR_HANDLE_ALL(select)
- KFR_SPEC_FN(in_select_impl, select)
-};
-
-template <>
-struct in_select_impl<cpu_t::avx1> : in_select_impl<cpu_t::sse41>
+ return slice<0, N>(select(expand_simd(a).asmask(), expand_simd(b), expand_simd(c)));
+}
+template <typename T, size_t N, KFR_ENABLE_IF(N >= vector_width<T, cpu_t::native>), typename = void>
+KFR_SINTRIN vec<T, N> select(mask<T, N> a, vec<T, N> b, vec<T, N> c)
{
- constexpr static cpu_t cpu = cpu_t::avx1;
- using in_select_impl<cpu_t::sse41>::select;
-
- KFR_CPU_INTRIN(avx) f64avx select(f64avx m, f64avx x, f64avx y) { return _mm256_blendv_pd(*y, *x, *m); }
- KFR_CPU_INTRIN(avx) f32avx select(f32avx m, f32avx x, f32avx y) { return _mm256_blendv_ps(*y, *x, *m); }
+ return concat(select(low(a).asmask(), low(b), low(c)), select(high(a).asmask(), high(b), high(c)));
+}
- KFR_HANDLE_ALL(select)
- KFR_SPEC_FN(in_select_impl, select)
-};
+#else
-template <>
-struct in_select_impl<cpu_t::avx2> : in_select_impl<cpu_t::avx1>
+// fallback
+template <typename T, size_t N>
+KFR_SINTRIN vec<T, N> select(mask<T, N> m, vec<T, N> x, vec<T, N> y)
{
- constexpr static cpu_t cpu = cpu_t::avx2;
- using in_select_impl<cpu_t::avx1>::select;
-
- KFR_CPU_INTRIN(avx2) u8avx select(u8avx m, u8avx x, u8avx y) { return _mm256_blendv_epi8(*y, *x, *m); }
- KFR_CPU_INTRIN(avx2) u16avx select(u16avx m, u16avx x, u16avx y)
- {
- return _mm256_blendv_epi8(*y, *x, *m);
- }
- KFR_CPU_INTRIN(avx2) u32avx select(u32avx m, u32avx x, u32avx y)
- {
- return _mm256_blendv_epi8(*y, *x, *m);
- }
- KFR_CPU_INTRIN(avx2) u64avx select(u64avx m, u64avx x, u64avx y)
- {
- return _mm256_blendv_epi8(*y, *x, *m);
- }
- KFR_CPU_INTRIN(avx2) i8avx select(i8avx m, i8avx x, i8avx y) { return _mm256_blendv_epi8(*y, *x, *m); }
- KFR_CPU_INTRIN(avx2) i16avx select(i16avx m, i16avx x, i16avx y)
- {
- return _mm256_blendv_epi8(*y, *x, *m);
- }
- KFR_CPU_INTRIN(avx2) i32avx select(i32avx m, i32avx x, i32avx y)
- {
- return _mm256_blendv_epi8(*y, *x, *m);
- }
- KFR_CPU_INTRIN(avx2) i64avx select(i64avx m, i64avx x, i64avx y)
- {
- return _mm256_blendv_epi8(*y, *x, *m);
- }
-
- KFR_HANDLE_ALL(select)
- KFR_SPEC_FN(in_select_impl, select)
-};
-
+ return y ^ ((x ^ y) & m);
+}
#endif
-template <cpu_t c = cpu_t::native>
-struct in_select : in_select_impl<c>
-{
- using in_select_impl<c>::select;
-
- template <typename T, size_t N, typename M>
- KFR_SINTRIN vec<T, N> select(mask<M, N> m, vec<T, N> x, vec<T, N> y)
- {
- static_assert(sizeof(M) == sizeof(T), "select: Incompatible types");
- return in_select_impl<c>::select(bitcast<T>(m), x, y);
- }
- template <typename T, size_t N, typename M>
- KFR_SINTRIN vec<T, N> select(mask<M, N> m, mask<T, N> x, mask<T, N> y)
- {
- static_assert(sizeof(M) == sizeof(T), "select: Incompatible types");
- return in_select_impl<c>::select(bitcast<T>(m), ref_cast<vec<T, N>>(x), ref_cast<vec<T, N>>(y));
- }
-
- template <typename T, size_t N, typename M>
- KFR_SINTRIN vec<T, N> select(mask<M, N> m, T x, T y)
- {
- static_assert(sizeof(M) == sizeof(T), "select: Incompatible types");
- return in_select_impl<c>::select(bitcast<T>(m), broadcast<N>(x), broadcast<N>(y));
- }
-
- template <typename T, size_t N, typename M>
- KFR_SINTRIN vec<T, N> select(mask<M, N> m, vec<T, N> x, T y)
- {
- static_assert(sizeof(M) == sizeof(T), "select: Incompatible types");
- return in_select_impl<c>::select(bitcast<T>(m), x, broadcast<N>(y));
- }
-
- template <typename T, size_t N, typename M>
- KFR_SINTRIN vec<T, N> select(mask<M, N> m, T x, vec<T, N> y)
- {
- static_assert(sizeof(M) == sizeof(T), "select: Incompatible types");
- return in_select_impl<c>::select(bitcast<T>(m), broadcast<N>(x), y);
- }
- template <typename T, size_t N, typename M>
- KFR_SINTRIN vec<T, N> select(mask<M, N> m, mask<T, N> x, T y)
- {
- static_assert(sizeof(M) == sizeof(T), "select: Incompatible types");
- return in_select_impl<c>::select(bitcast<T>(m), ref_cast<vec<T, N>>(x), broadcast<N>(y));
- }
-
- template <typename T, size_t N, typename M>
- KFR_SINTRIN vec<T, N> select(mask<M, N> m, T x, mask<T, N> y)
- {
- static_assert(sizeof(M) == sizeof(T), "select: Incompatible types");
- return in_select_impl<c>::select(m, broadcast<N>(x), ref_cast<vec<T, N>>(y));
- }
- KFR_SPEC_FN(in_select, select)
-
- template <typename T, size_t N>
- KFR_SINTRIN vec<T, N> sign(vec<T, N> x)
- {
- return select(x > T(), T(1), select(x < T(), T(-1), T(0)));
- }
-};
+KFR_I_FN(select)
}
-namespace native
+template <typename T1, size_t N, typename T2, typename T3,
+ KFR_ENABLE_IF(is_numeric_args<T1, T2, T3>::value),
+ typename Tout = subtype<common_type<T2, T3>>>
+KFR_INTRIN vec<Tout, N> select(const mask<T1, N>& m, const T2& x, const T3& y)
{
-using fn_select = internal::in_select<>::fn_select;
-template <typename T1, typename T2, typename T3, KFR_ENABLE_IF(is_numeric_args<T1, T2, T3>::value)>
-KFR_INLINE ftype<common_type<T2, T3>> select(const T1& arg1, const T2& arg2, const T3& arg3)
-{
- return internal::in_select<>::select(arg1, arg2, arg3);
+ static_assert(sizeof(T1) == sizeof(Tout), "select: incompatible types");
+ return internal::select(bitcast<Tout>(m).asmask(), static_cast<vec<Tout, N>>(x), static_cast<vec<Tout, N>>(y));
}
+
template <typename E1, typename E2, typename E3, KFR_ENABLE_IF(is_input_expressions<E1, E2, E3>::value)>
-KFR_INLINE expr_func<fn_select, E1, E2, E3> select(E1&& arg1, E2&& arg2, E3&& arg3)
+KFR_INTRIN expr_func<internal::fn_select, E1, E2, E3> select(E1&& m, E2&& x, E3&& y)
{
- return { fn_select(), std::forward<E1>(arg1), std::forward<E2>(arg2), std::forward<E3>(arg3) };
-}
+ return { {}, std::forward<E1>(m), std::forward<E2>(x), std::forward<E3>(y) };
}
}
diff --git a/include/kfr/base/sin_cos.hpp b/include/kfr/base/sin_cos.hpp
@@ -31,10 +31,6 @@
#include "select.hpp"
#include "shuffle.hpp"
-#pragma clang diagnostic push
-#if CID_HAS_WARNING("-Winaccessible-base")
-#pragma clang diagnostic ignored "-Winaccessible-base"
-#endif
#if CID_HAS_WARNING("-Wc99-extensions")
#pragma clang diagnostic ignored "-Wc99-extensions"
#endif
@@ -45,517 +41,402 @@ namespace kfr
namespace internal
{
-template <cpu_t c = cpu_t::native, cpu_t cc = c>
-struct in_trig : in_select<cc>
-{
-private:
- using in_select<cc>::select;
-
-protected:
- template <typename T, size_t N>
- KFR_SINTRIN vec<T, N> mask_horner(vec<T, N>, mask<T, N> msk, T a0, T b0)
- {
- return select(msk, a0, b0);
- }
-
- template <typename T, size_t N, typename... Ts>
- KFR_SINTRIN vec<T, N> mask_horner(vec<T, N> x, mask<T, N> msk, T a0, T b0, T a1, T b1, Ts... values)
- {
- return fmadd(mask_horner(x, msk, a1, b1, values...), x, select(msk, a0, b0));
- }
-};
-
-template <cpu_t c = cpu_t::native, cpu_t cc = c>
-struct in_sin_cos : private in_trig<cc>, private in_select<cc>, private in_round<cc>, private in_abs<cc>
-{
-
-private:
- using in_abs<cc>::abs;
- using in_round<cc>::floor;
- using in_select<cc>::select;
- using in_trig<cc>::mask_horner;
-
- template <typename T, size_t N, typename Tprecise = f64>
- KFR_SINTRIN vec<T, N> trig_fold(vec<T, N> x, vec<itype<T>, N>& quadrant)
- {
- const vec<T, N> xabs = abs(x);
- constexpr vec<T, N> div = fold_constant_div<T>;
- vec<T, N> y = floor(xabs / div);
- quadrant = cast<itype<T>>(y - floor(y * T(1.0 / 16.0)) * T(16.0));
-
- const mask<T, N> msk = bitcast<T>((quadrant & 1) != 0);
- quadrant = select(msk, quadrant + 1, quadrant);
- y = select(msk, y + T(1.0), y);
- quadrant = quadrant & 7;
-
- constexpr vec<Tprecise, N> hi = cast<Tprecise>(fold_constant_hi<T>);
- constexpr vec<T, N> rem1 = fold_constant_rem1<T>;
- constexpr vec<T, N> rem2 = fold_constant_rem2<T>;
- return cast<T>(cast<Tprecise>(xabs) - cast<Tprecise>(y) * hi) - y * rem1 - y * rem2;
- }
-
- template <size_t N>
- KFR_SINTRIN vec<f32, N> trig_sincos(vec<f32, N> folded, mask<f32, N> cosmask)
- {
- constexpr f32 sin_c2 = -0x2.aaaaacp-4f;
- constexpr f32 sin_c4 = 0x2.222334p-8f;
- constexpr f32 sin_c6 = -0xd.0566ep-16f;
- constexpr f32 sin_c8 = 0x3.64cc1cp-20f;
- constexpr f32 sin_c10 = -0x5.6c4a4p-24f;
- constexpr f32 cos_c2 = -0x8.p-4f;
- constexpr f32 cos_c4 = 0xa.aaaabp-8f;
- constexpr f32 cos_c6 = -0x5.b05d48p-12f;
- constexpr f32 cos_c8 = 0x1.a065f8p-16f;
- constexpr f32 cos_c10 = -0x4.cd156p-24f;
-
- const vec<f32, N> x2 = folded * folded;
-
- vec<f32, N> formula = mask_horner(x2, cosmask, 1.0f, 1.0f, cos_c2, sin_c2, cos_c4, sin_c4, cos_c6,
- sin_c6, cos_c8, sin_c8, cos_c10, sin_c10);
-
- formula = select(cosmask, formula, formula * folded);
- return formula;
- }
-
- template <size_t N>
- KFR_SINTRIN vec<f64, N> trig_sincos(vec<f64, N> folded, mask<f64, N> cosmask)
- {
- constexpr f64 sin_c2 = -0x2.aaaaaaaaaaaaap-4;
- constexpr f64 sin_c4 = 0x2.22222222220cep-8;
- constexpr f64 sin_c6 = -0xd.00d00cffd6618p-16;
- constexpr f64 sin_c8 = 0x2.e3bc744fb879ep-20;
- constexpr f64 sin_c10 = -0x6.b99034c1467a4p-28;
- constexpr f64 sin_c12 = 0xb.0711ea8fe8ee8p-36;
- constexpr f64 sin_c14 = -0xb.7e010897e55dp-44;
- constexpr f64 sin_c16 = -0xb.64eac07f1d6bp-48;
- constexpr f64 cos_c2 = -0x8.p-4;
- constexpr f64 cos_c4 = 0xa.aaaaaaaaaaaa8p-8;
- constexpr f64 cos_c6 = -0x5.b05b05b05ad28p-12;
- constexpr f64 cos_c8 = 0x1.a01a01a0022e6p-16;
- constexpr f64 cos_c10 = -0x4.9f93ed845de2cp-24;
- constexpr f64 cos_c12 = 0x8.f76bc015abe48p-32;
- constexpr f64 cos_c14 = -0xc.9bf2dbe00379p-40;
- constexpr f64 cos_c16 = 0xd.1232ac32f7258p-48;
-
- vec<f64, N> x2 = folded * folded;
- vec<f64, N> formula =
- mask_horner(x2, cosmask, 1.0, 1.0, cos_c2, sin_c2, cos_c4, sin_c4, cos_c6, sin_c6, cos_c8, sin_c8,
- cos_c10, sin_c10, cos_c12, sin_c12, cos_c14, sin_c14, cos_c16, sin_c16);
-
- formula = select(cosmask, formula, formula * folded);
- return formula;
- }
-
- template <typename T, size_t N, typename = u8[N > 1]>
- KFR_SINTRIN vec<T, N> sincos_mask(vec<T, N> x_full, mask<T, N> cosmask)
- {
- vec<itype<T>, N> quadrant;
- vec<T, N> folded = trig_fold(x_full, quadrant);
-
- mask<T, N> flip_sign = select(cosmask, (quadrant == 2) || (quadrant == 4), quadrant >= 4);
-
- mask<T, N> usecos = (quadrant == 2) || (quadrant == 6);
- usecos = usecos ^ cosmask;
-
- vec<T, N> formula = trig_sincos(folded, usecos);
-
- mask<T, N> negmask = x_full < 0;
-
- flip_sign = flip_sign ^ (negmask & ~cosmask);
-
- formula = select(flip_sign, -formula, formula);
- return formula;
- }
-
- template <typename T>
- constexpr static T fold_constant_div = choose_const<T>(0x1.921fb6p-1f, 0x1.921fb54442d18p-1);
-
- template <typename T>
- constexpr static T fold_constant_hi = choose_const<T>(0x1.922000p-1f, 0x1.921fb40000000p-1);
- template <typename T>
- constexpr static T fold_constant_rem1 = choose_const<T>(-0x1.2ae000p-19f, 0x1.4442d00000000p-25);
- template <typename T>
- constexpr static T fold_constant_rem2 = choose_const<T>(-0x1.de973ep-32f, 0x1.8469898cc5170p-49);
- constexpr static cpu_t cur = c;
-
-public:
- template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)>
- KFR_SINTRIN vec<T, N> sin(vec<T, N> x)
- {
- vec<itype<T>, N> quadrant;
- vec<T, N> folded = trig_fold(x, quadrant);
-
- mask<T, N> flip_sign = quadrant >= 4;
- mask<T, N> usecos = (quadrant == 2) || (quadrant == 6);
-
- vec<T, N> formula = trig_sincos(folded, usecos);
-
- formula = select(flip_sign ^ x.asmask(), -formula, formula);
- return formula;
- }
- template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)>
- KFR_SINTRIN vec<T, N> cos(vec<T, N> x)
- {
- vec<itype<T>, N> quadrant;
- vec<T, N> folded = trig_fold(x, quadrant);
-
- mask<T, N> eq4 = (quadrant == 4);
- mask<T, N> flip_sign = (quadrant == 2) || eq4;
- mask<T, N> usecos = (quadrant == 0) || eq4;
-
- vec<T, N> formula = trig_sincos(folded, usecos);
-
- formula = select(flip_sign, -formula, formula);
- return formula;
- }
-
- template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)>
- KFR_SINTRIN vec<T, N> fastsin(vec<T, N> x)
- {
- constexpr vec<T, N> msk = broadcast<N>(highbitmask<T>);
-
- constexpr static T c2 = -0.16665853559970855712890625;
- constexpr static T c4 = +8.31427983939647674560546875e-3;
- constexpr static T c6 = -1.85423981747590005397796630859375e-4;
-
- const vec<T, N> pi = c_pi<T>;
-
- x -= pi;
- vec<T, N> y = abs(x);
- y = select(y > c_pi<T, 1, 2>, pi - y, y);
- y = y ^ (msk & ~x);
-
- vec<T, N> y2 = y * y;
- vec<T, N> formula = c6;
- vec<T, N> y3 = y2 * y;
- formula = fmadd(formula, y2, c4);
- formula = fmadd(formula, y2, c2);
- formula = formula * y3 + y;
- return formula;
- }
-
- template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)>
- KFR_SINTRIN vec<T, N> fastcos(vec<T, N> x)
- {
- x += c_pi<T, 1, 2>;
- x = select(x >= c_pi<T, 2>, x - c_pi<T, 2>, x);
- return fastsin(x);
- }
- template <typename T, size_t N, KFR_ENABLE_IF(N > 1 && is_f_class<T>::value)>
- KFR_SINTRIN vec<T, N> sincos(vec<T, N> x)
- {
- return sincos_mask(x, internal::oddmask<T, N>());
- }
-
- template <typename T, size_t N, KFR_ENABLE_IF(N > 1 && is_f_class<T>::value)>
- KFR_SINTRIN vec<T, N> cossin(vec<T, N> x)
- {
- return sincos_mask(x, internal::evenmask<T, N>());
- }
-
- template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)>
- KFR_SINTRIN vec<T, N> sinc(vec<T, N> x)
- {
- return select(abs(x) <= c_epsilon<T>, T(1), sin(x) / x);
- }
- template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>>
- KFR_SINTRIN vec<Tout, N> sin(vec<T, N> x)
- {
- return sin(cast<Tout>(x));
- }
- template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>>
- KFR_SINTRIN vec<Tout, N> cos(vec<T, N> x)
- {
- return cos(cast<Tout>(x));
- }
- template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>>
- KFR_SINTRIN vec<Tout, N> fastsin(vec<T, N> x)
- {
- return fastsin(cast<Tout>(x));
- }
- template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>>
- KFR_SINTRIN vec<Tout, N> fastcos(vec<T, N> x)
- {
- return fastcos(cast<Tout>(x));
- }
- template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>>
- KFR_SINTRIN vec<Tout, N> sincos(vec<T, N> x)
- {
- return sincos(cast<Tout>(x));
- }
- template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>>
- KFR_SINTRIN vec<Tout, N> cossin(vec<T, N> x)
- {
- return cossin(cast<Tout>(x));
- }
- template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>>
- KFR_SINTRIN vec<Tout, N> sinc(vec<T, N> x)
- {
- return sinc(cast<Tout>(x));
- }
-
- template <typename T>
- KFR_SINTRIN T sindeg(const T& x)
- {
- return sin(x * c_degtorad<T>);
- }
- template <typename T>
- KFR_SINTRIN T cosdeg(const T& x)
- {
- return cos(x * c_degtorad<T>);
- }
-
- template <typename T>
- KFR_SINTRIN T fastsindeg(const T& x)
- {
- return fastsin(x * c_degtorad<T>);
- }
- template <typename T>
- KFR_SINTRIN T fastcosdeg(const T& x)
- {
- return fastcos(x * c_degtorad<T>);
- }
-
- template <typename T>
- KFR_SINTRIN T sincosdeg(const T& x)
- {
- return sincos(x * c_degtorad<T>);
- }
- template <typename T>
- KFR_SINTRIN T cossindeg(const T& x)
- {
- return cossin(x * c_degtorad<T>);
- }
-
- KFR_HANDLE_SCALAR(sin)
- KFR_HANDLE_SCALAR(cos)
- KFR_HANDLE_SCALAR(fastsin)
- KFR_HANDLE_SCALAR(fastcos)
- KFR_HANDLE_SCALAR(sincos)
- KFR_HANDLE_SCALAR(cossin)
- KFR_HANDLE_SCALAR(sinc)
-
- KFR_SPEC_FN(in_sin_cos, sin)
- KFR_SPEC_FN(in_sin_cos, cos)
- KFR_SPEC_FN(in_sin_cos, fastsin)
- KFR_SPEC_FN(in_sin_cos, fastcos)
- KFR_SPEC_FN(in_sin_cos, sincos_mask)
- KFR_SPEC_FN(in_sin_cos, sincos)
- KFR_SPEC_FN(in_sin_cos, cossin)
- KFR_SPEC_FN(in_sin_cos, sinc)
- KFR_SPEC_FN(in_sin_cos, sindeg)
- KFR_SPEC_FN(in_sin_cos, cosdeg)
- KFR_SPEC_FN(in_sin_cos, fastsindeg)
- KFR_SPEC_FN(in_sin_cos, fastcosdeg)
- KFR_SPEC_FN(in_sin_cos, sincosdeg)
- KFR_SPEC_FN(in_sin_cos, cossindeg)
-};
-}
-
-namespace native
-{
-using fn_sin = internal::in_sin_cos<>::fn_sin;
-template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> sin(const T1& x)
+template <typename T>
+constexpr static T fold_constant_div = choose_const<T>(0x1.921fb6p-1f, 0x1.921fb54442d18p-1);
+
+template <typename T>
+constexpr static T fold_constant_hi = choose_const<T>(0x1.922000p-1f, 0x1.921fb40000000p-1);
+template <typename T>
+constexpr static T fold_constant_rem1 = choose_const<T>(-0x1.2ae000p-19f, 0x1.4442d00000000p-25);
+template <typename T>
+constexpr static T fold_constant_rem2 = choose_const<T>(-0x1.de973ep-32f, 0x1.8469898cc5170p-49);
+
+template <typename T, size_t N>
+KFR_SINTRIN vec<T, N> trig_horner(vec<T, N>, mask<T, N> msk, T a0, T b0)
{
- return internal::in_sin_cos<>::sin(x);
+ return select(msk, a0, b0);
}
-template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_sin, E1> sin(E1&& x)
+template <typename T, size_t N, typename... Ts>
+KFR_SINTRIN vec<T, N> trig_horner(vec<T, N> x, mask<T, N> msk, T a0, T b0, T a1, T b1, Ts... values)
{
- return { fn_sin(), std::forward<E1>(x) };
+ return fmadd(trig_horner(x, msk, a1, b1, values...), x, select(msk, a0, b0));
}
-using fn_cos = internal::in_sin_cos<>::fn_cos;
-template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> cos(const T1& x)
+template <typename T, size_t N, typename Tprecise = f64>
+KFR_SINTRIN vec<T, N> trig_fold(vec<T, N> x, vec<itype<T>, N>& quadrant)
{
- return internal::in_sin_cos<>::cos(x);
+ const vec<T, N> xabs = abs(x);
+ constexpr vec<T, N> div = fold_constant_div<T>;
+ vec<T, N> y = floor(xabs / div);
+ quadrant = cast<itype<T>>(y - floor(y * T(1.0 / 16.0)) * T(16.0));
+
+ const mask<T, N> msk = bitcast<T>((quadrant & 1) != 0);
+ quadrant = kfr::select(msk, quadrant + 1, quadrant);
+ y = select(msk, y + T(1.0), y);
+ quadrant = quadrant & 7;
+
+ constexpr vec<Tprecise, N> hi = cast<Tprecise>(fold_constant_hi<T>);
+ constexpr vec<T, N> rem1 = fold_constant_rem1<T>;
+ constexpr vec<T, N> rem2 = fold_constant_rem2<T>;
+ return cast<T>(cast<Tprecise>(xabs) - cast<Tprecise>(y) * hi) - y * rem1 - y * rem2;
}
-template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_cos, E1> cos(E1&& x)
+template <size_t N>
+KFR_SINTRIN vec<f32, N> trig_sincos(vec<f32, N> folded, mask<f32, N> cosmask)
{
- return { fn_cos(), std::forward<E1>(x) };
+ constexpr f32 sin_c2 = -0x2.aaaaacp-4f;
+ constexpr f32 sin_c4 = 0x2.222334p-8f;
+ constexpr f32 sin_c6 = -0xd.0566ep-16f;
+ constexpr f32 sin_c8 = 0x3.64cc1cp-20f;
+ constexpr f32 sin_c10 = -0x5.6c4a4p-24f;
+ constexpr f32 cos_c2 = -0x8.p-4f;
+ constexpr f32 cos_c4 = 0xa.aaaabp-8f;
+ constexpr f32 cos_c6 = -0x5.b05d48p-12f;
+ constexpr f32 cos_c8 = 0x1.a065f8p-16f;
+ constexpr f32 cos_c10 = -0x4.cd156p-24f;
+
+ const vec<f32, N> x2 = folded * folded;
+
+ vec<f32, N> formula = trig_horner(x2, cosmask, 1.0f, 1.0f, cos_c2, sin_c2, cos_c4, sin_c4, cos_c6, sin_c6,
+ cos_c8, sin_c8, cos_c10, sin_c10);
+
+ formula = select(cosmask, formula, formula * folded);
+ return formula;
}
-using fn_fastsin = internal::in_sin_cos<>::fn_fastsin;
-template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> fastsin(const T1& x)
+
+template <size_t N>
+KFR_SINTRIN vec<f64, N> trig_sincos(vec<f64, N> folded, mask<f64, N> cosmask)
{
- return internal::in_sin_cos<>::fastsin(x);
+ constexpr f64 sin_c2 = -0x2.aaaaaaaaaaaaap-4;
+ constexpr f64 sin_c4 = 0x2.22222222220cep-8;
+ constexpr f64 sin_c6 = -0xd.00d00cffd6618p-16;
+ constexpr f64 sin_c8 = 0x2.e3bc744fb879ep-20;
+ constexpr f64 sin_c10 = -0x6.b99034c1467a4p-28;
+ constexpr f64 sin_c12 = 0xb.0711ea8fe8ee8p-36;
+ constexpr f64 sin_c14 = -0xb.7e010897e55dp-44;
+ constexpr f64 sin_c16 = -0xb.64eac07f1d6bp-48;
+ constexpr f64 cos_c2 = -0x8.p-4;
+ constexpr f64 cos_c4 = 0xa.aaaaaaaaaaaa8p-8;
+ constexpr f64 cos_c6 = -0x5.b05b05b05ad28p-12;
+ constexpr f64 cos_c8 = 0x1.a01a01a0022e6p-16;
+ constexpr f64 cos_c10 = -0x4.9f93ed845de2cp-24;
+ constexpr f64 cos_c12 = 0x8.f76bc015abe48p-32;
+ constexpr f64 cos_c14 = -0xc.9bf2dbe00379p-40;
+ constexpr f64 cos_c16 = 0xd.1232ac32f7258p-48;
+
+ vec<f64, N> x2 = folded * folded;
+ vec<f64, N> formula =
+ trig_horner(x2, cosmask, 1.0, 1.0, cos_c2, sin_c2, cos_c4, sin_c4, cos_c6, sin_c6, cos_c8, sin_c8,
+ cos_c10, sin_c10, cos_c12, sin_c12, cos_c14, sin_c14, cos_c16, sin_c16);
+
+ formula = select(cosmask, formula, formula * folded);
+ return formula;
}
-template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_fastsin, E1> fastsin(E1&& x)
+template <typename T, size_t N, typename = u8[N > 1]>
+KFR_SINTRIN vec<T, N> sincos_mask(vec<T, N> x_full, mask<T, N> cosmask)
{
- return { fn_fastsin(), std::forward<E1>(x) };
+ vec<itype<T>, N> quadrant;
+ vec<T, N> folded = trig_fold(x_full, quadrant);
+
+ mask<T, N> flip_sign = select(cosmask, (quadrant == 2) || (quadrant == 4), quadrant >= 4);
+
+ mask<T, N> usecos = (quadrant == 2) || (quadrant == 6);
+ usecos = usecos ^ cosmask;
+
+ vec<T, N> formula = trig_sincos(folded, usecos);
+
+ mask<T, N> negmask = x_full < 0;
+
+ flip_sign = flip_sign ^ (negmask & ~cosmask);
+
+ formula = select(flip_sign, -formula, formula);
+ return formula;
}
-using fn_fastcos = internal::in_sin_cos<>::fn_fastcos;
-template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> fastcos(const T1& x)
+template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)>
+KFR_SINTRIN vec<T, N> sin(vec<T, N> x)
{
- return internal::in_sin_cos<>::fastcos(x);
+ vec<itype<T>, N> quadrant;
+ vec<T, N> folded = trig_fold(x, quadrant);
+
+ mask<T, N> flip_sign = quadrant >= 4;
+ mask<T, N> usecos = (quadrant == 2) || (quadrant == 6);
+
+ vec<T, N> formula = trig_sincos(folded, usecos);
+
+ formula = select(flip_sign ^ x.asmask(), -formula, formula);
+ return formula;
}
-template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_fastcos, E1> fastcos(E1&& x)
+template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)>
+KFR_SINTRIN vec<T, N> cos(vec<T, N> x)
{
- return { fn_fastcos(), std::forward<E1>(x) };
+ vec<itype<T>, N> quadrant;
+ vec<T, N> folded = trig_fold(x, quadrant);
+
+ mask<T, N> eq4 = (quadrant == 4);
+ mask<T, N> flip_sign = (quadrant == 2) || eq4;
+ mask<T, N> usecos = (quadrant == 0) || eq4;
+
+ vec<T, N> formula = trig_sincos(folded, usecos);
+
+ formula = select(flip_sign, -formula, formula);
+ return formula;
}
-using fn_sincos_mask = internal::in_sin_cos<>::fn_sincos_mask;
-template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> sincos_mask(const T1& x)
+template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)>
+KFR_SINTRIN vec<T, N> fastsin(vec<T, N> x)
+{
+ constexpr vec<T, N> msk = broadcast<N>(internal::highbitmask<T>);
+
+ constexpr static T c2 = -0.16665853559970855712890625;
+ constexpr static T c4 = +8.31427983939647674560546875e-3;
+ constexpr static T c6 = -1.85423981747590005397796630859375e-4;
+
+ const vec<T, N> pi = c_pi<T>;
+
+ x -= pi;
+ vec<T, N> y = abs(x);
+ y = select(y > c_pi<T, 1, 2>, pi - y, y);
+ y = y ^ (msk & ~x);
+
+ vec<T, N> y2 = y * y;
+ vec<T, N> formula = c6;
+ vec<T, N> y3 = y2 * y;
+ formula = fmadd(formula, y2, c4);
+ formula = fmadd(formula, y2, c2);
+ formula = formula * y3 + y;
+ return formula;
+}
+
+template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)>
+KFR_SINTRIN vec<T, N> fastcos(vec<T, N> x)
{
- return internal::in_sin_cos<>::sincos_mask(x);
+ x += c_pi<T, 1, 2>;
+ x = select(x >= c_pi<T, 2>, x - c_pi<T, 2>, x);
+ return fastsin(x);
}
-template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_sincos_mask, E1> sincos_mask(E1&& x)
+template <typename T, size_t N, KFR_ENABLE_IF(N > 1 && is_f_class<T>::value)>
+KFR_SINTRIN vec<T, N> sincos(vec<T, N> x)
{
- return { fn_sincos_mask(), std::forward<E1>(x) };
+ return sincos_mask(x, internal::oddmask<T, N>());
}
-using fn_sincos = internal::in_sin_cos<>::fn_sincos;
-template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> sincos(const T1& x)
+template <typename T, size_t N, KFR_ENABLE_IF(N > 1 && is_f_class<T>::value)>
+KFR_SINTRIN vec<T, N> cossin(vec<T, N> x)
{
- return internal::in_sin_cos<>::sincos(x);
+ return sincos_mask(x, internal::evenmask<T, N>());
}
-template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_sincos, E1> sincos(E1&& x)
+template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)>
+KFR_SINTRIN vec<T, N> sinc(vec<T, N> x)
{
- return { fn_sincos(), std::forward<E1>(x) };
+ return select(abs(x) <= c_epsilon<T>, T(1), sin(x) / x);
}
-using fn_cossin = internal::in_sin_cos<>::fn_cossin;
-template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> cossin(const T1& x)
+template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>>
+KFR_SINTRIN vec<Tout, N> sin(vec<T, N> x)
{
- return internal::in_sin_cos<>::cossin(x);
+ return sin(cast<Tout>(x));
}
-template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_cossin, E1> cossin(E1&& x)
+template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>>
+KFR_SINTRIN vec<Tout, N> cos(vec<T, N> x)
+{
+ return cos(cast<Tout>(x));
+}
+
+template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>>
+KFR_SINTRIN vec<Tout, N> fastsin(vec<T, N> x)
{
- return { fn_cossin(), std::forward<E1>(x) };
+ return fastsin(cast<Tout>(x));
+}
+
+template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>>
+KFR_SINTRIN vec<Tout, N> fastcos(vec<T, N> x)
+{
+ return fastcos(cast<Tout>(x));
+}
+
+template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>>
+KFR_SINTRIN vec<Tout, N> sincos(vec<T, N> x)
+{
+ return sincos(cast<Tout>(x));
+}
+
+template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>>
+KFR_SINTRIN vec<Tout, N> cossin(vec<T, N> x)
+{
+ return cossin(cast<Tout>(x));
+}
+
+template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>>
+KFR_SINTRIN vec<Tout, N> sinc(vec<T, N> x)
+{
+ return sinc(cast<Tout>(x));
+}
+
+template <typename T>
+KFR_SINTRIN T sindeg(const T& x)
+{
+ return sin(x * c_degtorad<T>);
+}
+
+template <typename T>
+KFR_SINTRIN T cosdeg(const T& x)
+{
+ return cos(x * c_degtorad<T>);
+}
+
+template <typename T>
+KFR_SINTRIN T fastsindeg(const T& x)
+{
+ return fastsin(x * c_degtorad<T>);
+}
+
+template <typename T>
+KFR_SINTRIN T fastcosdeg(const T& x)
+{
+ return fastcos(x * c_degtorad<T>);
+}
+
+template <typename T>
+KFR_SINTRIN T sincosdeg(const T& x)
+{
+ return sincos(x * c_degtorad<T>);
+}
+
+template <typename T>
+KFR_SINTRIN T cossindeg(const T& x)
+{
+ return cossin(x * c_degtorad<T>);
+}
+
+KFR_HANDLE_SCALAR_1(sin)
+KFR_HANDLE_SCALAR_1(cos)
+KFR_HANDLE_SCALAR_1(fastsin)
+KFR_HANDLE_SCALAR_1(fastcos)
+KFR_HANDLE_SCALAR_1(sincos)
+KFR_HANDLE_SCALAR_1(cossin)
+KFR_HANDLE_SCALAR_1(sinc)
+
+KFR_FN(sin)
+KFR_FN(cos)
+KFR_FN(fastsin)
+KFR_FN(fastcos)
+KFR_FN(sincos)
+KFR_FN(cossin)
+KFR_FN(sinc)
}
-using fn_sindeg = internal::in_sin_cos<>::fn_sindeg;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> sindeg(const T1& x)
+KFR_INTRIN ftype<T1> sin(const T1& x)
{
- return internal::in_sin_cos<>::sindeg(x);
+ return internal::sin(x);
}
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_sindeg, E1> sindeg(E1&& x)
+KFR_INTRIN expr_func<internal::fn_sin, E1> sin(E1&& x)
{
- return { fn_sindeg(), std::forward<E1>(x) };
+ return { {}, std::forward<E1>(x) };
}
-using fn_cosdeg = internal::in_sin_cos<>::fn_cosdeg;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> cosdeg(const T1& x)
+KFR_INTRIN ftype<T1> cos(const T1& x)
{
- return internal::in_sin_cos<>::cosdeg(x);
+ return internal::cos(x);
}
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_cosdeg, E1> cosdeg(E1&& x)
+KFR_INTRIN expr_func<internal::fn_cos, E1> cos(E1&& x)
{
- return { fn_cosdeg(), std::forward<E1>(x) };
+ return { {}, std::forward<E1>(x) };
}
-using fn_fastsindeg = internal::in_sin_cos<>::fn_fastsindeg;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> fastsindeg(const T1& x)
+KFR_INTRIN ftype<T1> fastsin(const T1& x)
{
- return internal::in_sin_cos<>::fastsindeg(x);
+ return internal::fastsin(x);
}
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_fastsindeg, E1> fastsindeg(E1&& x)
+KFR_INTRIN expr_func<internal::fn_fastsin, E1> fastsin(E1&& x)
{
- return { fn_fastsindeg(), std::forward<E1>(x) };
+ return { {}, std::forward<E1>(x) };
}
-using fn_fastcosdeg = internal::in_sin_cos<>::fn_fastcosdeg;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> fastcosdeg(const T1& x)
+KFR_INTRIN ftype<T1> fastcos(const T1& x)
{
- return internal::in_sin_cos<>::fastcosdeg(x);
+ return internal::fastcos(x);
}
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_fastcosdeg, E1> fastcosdeg(E1&& x)
+KFR_INTRIN expr_func<internal::fn_fastcos, E1> fastcos(E1&& x)
{
- return { fn_fastcosdeg(), std::forward<E1>(x) };
+ return { {}, std::forward<E1>(x) };
}
-using fn_sincosdeg = internal::in_sin_cos<>::fn_sincosdeg;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> sincosdeg(const T1& x)
+KFR_INTRIN ftype<T1> sincos(const T1& x)
{
- return internal::in_sin_cos<>::sincosdeg(x);
+ return internal::sincos(x);
}
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_sincosdeg, E1> sincosdeg(E1&& x)
+KFR_INTRIN expr_func<internal::fn_sincos, E1> sincos(E1&& x)
{
- return { fn_sincosdeg(), std::forward<E1>(x) };
+ return { {}, std::forward<E1>(x) };
}
-using fn_cossindeg = internal::in_sin_cos<>::fn_cossindeg;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> cossindeg(const T1& x)
+KFR_INTRIN ftype<T1> cossin(const T1& x)
{
- return internal::in_sin_cos<>::cossindeg(x);
+ return internal::cossin(x);
}
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_cossindeg, E1> cossindeg(E1&& x)
+KFR_INTRIN expr_func<internal::fn_cossin, E1> cossin(E1&& x)
{
- return { fn_cossindeg(), std::forward<E1>(x) };
+ return { {}, std::forward<E1>(x) };
}
-using fn_sinc = internal::in_sin_cos<>::fn_sinc;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
KFR_INTRIN ftype<T1> sinc(const T1& x)
{
- return internal::in_sin_cos<>::sinc(x);
+ return internal::sinc(x);
}
+
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_sinc, E1> sinc(E1&& x)
+KFR_INTRIN expr_func<internal::fn_sinc, E1> sinc(E1&& x)
{
return { {}, std::forward<E1>(x) };
}
template <typename T>
-inline T sin2x(const T& sinx, const T& cosx)
+KFR_SINTRIN T sin2x(const T& sinx, const T& cosx)
{
return 2 * sinx * cosx;
}
+
template <typename T>
-inline T sin3x(const T& sinx, const T& cosx)
+KFR_SINTRIN T sin3x(const T& sinx, const T& cosx)
{
return sinx * (-1 + 4 * sqr(cosx));
}
template <typename T>
-inline T cos2x(const T& sinx, const T& cosx)
+KFR_SINTRIN T cos2x(const T& sinx, const T& cosx)
{
return sqr(cosx) - sqr(sinx);
}
+
template <typename T>
-inline T cos3x(const T& sinx, const T& cosx)
+KFR_SINTRIN T cos3x(const T& sinx, const T& cosx)
{
return cosx * (1 - 4 * sqr(sinx));
}
}
-}
-
-#pragma clang diagnostic pop
diff --git a/include/kfr/base/sqrt.hpp b/include/kfr/base/sqrt.hpp
@@ -30,77 +30,42 @@ namespace kfr
namespace internal
{
-template <cpu_t c = cpu_t::native>
-struct in_sqrt : in_sqrt<older(c)>
-{
- struct fn_sqrt : fn_disabled
- {
- };
-};
-
-template <>
-struct in_sqrt<cpu_t::common>
-{
- constexpr static cpu_t cpu = cpu_t::common;
-
- template <size_t N>
- KFR_SINTRIN vec<f32, N> sqrt(vec<f32, N> x)
- {
- return apply([](float xx) { return std::sqrt(xx); }, x);
- }
- template <size_t N>
- KFR_SINTRIN vec<f64, N> sqrt(vec<f64, N> x)
- {
- return apply([](double xx) { return std::sqrt(xx); }, x);
- }
+#if defined CID_ARCH_SSE2
- KFR_HANDLE_SCALAR(sqrt)
- KFR_SPEC_FN(in_sqrt, sqrt)
-};
+KFR_SINTRIN f32x1 sqrt(f32x1 x) { return slice<0, 1>(tovec(_mm_sqrt_ss(*extend<4>(x)))); }
+KFR_SINTRIN f64x1 sqrt(f64x1 x) { return slice<0, 1>(tovec(_mm_sqrt_sd(_mm_setzero_pd(), *extend<2>(x)))); }
+KFR_SINTRIN f32sse sqrt(f32sse x) { return _mm_sqrt_ps(*x); }
+KFR_SINTRIN f64sse sqrt(f64sse x) { return _mm_sqrt_pd(*x); }
-#ifdef CID_ARCH_X86
-
-template <>
-struct in_sqrt<cpu_t::sse2>
-{
- constexpr static cpu_t cpu = cpu_t::sse2;
+#if defined CID_ARCH_AVX
+KFR_SINTRIN f32avx sqrt(f32avx x) { return _mm256_sqrt_ps(*x); }
+KFR_SINTRIN f64avx sqrt(f64avx x) { return _mm256_sqrt_pd(*x); }
+#endif
- KFR_SINTRIN f32sse sqrt(f32sse x) { return _mm_sqrt_ps(*x); }
- KFR_SINTRIN f64sse sqrt(f64sse x) { return _mm_sqrt_pd(*x); }
+KFR_HANDLE_ALL_SIZES_1(sqrt)
- KFR_HANDLE_ALL(sqrt)
- KFR_HANDLE_SCALAR(sqrt)
- KFR_SPEC_FN(in_sqrt, sqrt)
-};
+#else
-template <>
-struct in_sqrt<cpu_t::avx1> : in_sqrt<cpu_t::sse2>
+// fallback
+template <typename T, size_t N>
+KFR_SINTRIN vec<T, N> sqrt(vec<T, N> x)
{
- constexpr static cpu_t cpu = cpu_t::avx1;
- using in_sqrt<cpu_t::sse2>::sqrt;
-
- KFR_SINTRIN f32avx KFR_USE_CPU(avx) sqrt(f32avx x) { return _mm256_sqrt_ps(*x); }
- KFR_SINTRIN f64avx KFR_USE_CPU(avx) sqrt(f64avx x) { return _mm256_sqrt_pd(*x); }
-
- KFR_HANDLE_ALL(sqrt)
- KFR_HANDLE_SCALAR(sqrt)
- KFR_SPEC_FN(in_sqrt, sqrt)
-};
+ return apply([](T x) { return std::sqrt(x); }, x);
+}
#endif
+KFR_HANDLE_SCALAR_1(sqrt)
+KFR_FN(sqrt)
}
-namespace native
-{
-using fn_sqrt = internal::in_sqrt<>::fn_sqrt;
+
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
KFR_INTRIN ftype<T1> sqrt(const T1& x)
{
- return internal::in_sqrt<>::sqrt(x);
+ return internal::sqrt(x);
}
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_sqrt, E1> sqrt(E1&& x)
+KFR_INTRIN expr_func<internal::fn_sqrt, E1> sqrt(E1&& x)
{
- return { fn_sqrt(), std::forward<E1>(x) };
-}
+ return { {}, std::forward<E1>(x) };
}
}
diff --git a/include/kfr/base/tan.hpp b/include/kfr/base/tan.hpp
@@ -28,156 +28,129 @@
#include "select.hpp"
#include "sin_cos.hpp"
-#pragma clang diagnostic push
-#if CID_HAS_WARNING("-Winaccessible-base")
-#pragma clang diagnostic ignored "-Winaccessible-base"
-#endif
-#if CID_HAS_WARNING("-Wc99-extensions")
-#pragma clang diagnostic ignored "-Wc99-extensions"
-#endif
-
namespace kfr
{
namespace internal
{
-template <cpu_t c = cpu_t::native, cpu_t cc = c>
-struct in_tan : in_trig<cc>, in_select<cc>, in_round<cc>, in_abs<cc>
+template <typename T, size_t N, typename IT = itype<T>>
+KFR_SINTRIN vec<T, N> trig_fold_simple(vec<T, N> x_full, mask<T, N>& inverse)
+{
+ constexpr T pi_14 = c_pi<T, 1, 4>;
+
+ vec<T, N> y = abs(x_full);
+ vec<T, N> scaled = y / pi_14;
+
+ vec<T, N> k_real = floor(scaled);
+ vec<IT, N> k = cast<IT>(k_real);
+
+ vec<T, N> x = y - k_real * pi_14;
+
+ mask<T, N> need_offset = (k & 1) != 0;
+ x = select(need_offset, x - pi_14, x);
+
+ vec<IT, N> k_mod4 = k & 3;
+ inverse = (k_mod4 == 1) || (k_mod4 == 2);
+ return x;
+}
+
+template <size_t N>
+KFR_SINTRIN vec<f32, N> tan(vec<f32, N> x_full)
{
-private:
- using in_abs<cc>::abs;
- using in_round<cc>::floor;
- using in_select<cc>::select;
- using in_trig<cc>::mask_horner;
-
- template <typename T, size_t N, typename IT = itype<T>>
- KFR_SINTRIN vec<T, N> trig_fold(vec<T, N> x_full, mask<T, N>& inverse)
- {
- constexpr T pi_14 = c_pi<T, 1, 4>;
-
- vec<T, N> y = abs(x_full);
- vec<T, N> scaled = y / pi_14;
-
- vec<T, N> k_real = floor(scaled);
- vec<IT, N> k = cast<IT>(k_real);
-
- vec<T, N> x = y - k_real * pi_14;
-
- mask<T, N> need_offset = (k & 1) != 0;
- x = select(need_offset, x - pi_14, x);
-
- vec<IT, N> k_mod4 = k & 3;
- inverse = (k_mod4 == 1) || (k_mod4 == 2);
- return x;
- }
-
-public:
- template <size_t N>
- KFR_SINTRIN vec<f32, N> tan(vec<f32, N> x_full)
- {
- mask<f32, N> inverse;
- const vec<f32, N> x = trig_fold(x_full, inverse);
-
- constexpr f32 tan_c2 = 0x5.555378p-4;
- constexpr f32 tan_c4 = 0x2.225bb8p-4;
- constexpr f32 tan_c6 = 0xd.ac3fep-8;
- constexpr f32 tan_c8 = 0x6.41644p-8;
- constexpr f32 tan_c10 = 0xc.bfe7ep-12;
- constexpr f32 tan_c12 = 0x2.6754dp-8;
-
- constexpr f32 cot_c2 = -0x5.555558p-4;
- constexpr f32 cot_c4 = -0x5.b0581p-8;
- constexpr f32 cot_c6 = -0x8.ac5ccp-12;
- constexpr f32 cot_c8 = -0xd.aaa01p-16;
- constexpr f32 cot_c10 = -0x1.a9a9b4p-16;
- constexpr f32 cot_c12 = -0x6.f7d4dp-24;
-
- const vec<f32, N> x2 = x * x;
- const vec<f32, N> val = mask_horner(x2, inverse, 1.0f, 1.0f, cot_c2, tan_c2, cot_c4, tan_c4, cot_c6,
- tan_c6, cot_c8, tan_c8, cot_c10, tan_c10, cot_c12, tan_c12);
-
- const vec<f32, N> z = select(inverse, val / -x, val * x);
- return mulsign(z, x_full);
- }
-
- template <size_t N>
- KFR_SINTRIN vec<f64, N> tan(vec<f64, N> x_full)
- {
- mask<f64, N> inverse;
- const vec<f64, N> x = trig_fold(x_full, inverse);
-
- constexpr f64 tan_c2 = 0x5.5555554d8e5b8p-4;
- constexpr f64 tan_c4 = 0x2.222224820264p-4;
- constexpr f64 tan_c6 = 0xd.d0d90de32b3e8p-8;
- constexpr f64 tan_c8 = 0x5.99723bdcf5cacp-8;
- constexpr f64 tan_c10 = 0x2.434a142e413ap-8;
- constexpr f64 tan_c12 = 0xf.2b59061305efp-12;
- constexpr f64 tan_c14 = 0x4.a12565071a664p-12;
- constexpr f64 tan_c16 = 0x4.dada3797ac1bcp-12;
- constexpr f64 tan_c18 = -0x1.a74976b6ea3f3p-12;
- constexpr f64 tan_c20 = 0x1.d06a5ae5e4a74p-12;
-
- constexpr f64 cot_c2 = -0x5.5555555555554p-4;
- constexpr f64 cot_c4 = -0x5.b05b05b05b758p-8;
- constexpr f64 cot_c6 = -0x8.ab355dffc79a8p-12;
- constexpr f64 cot_c8 = -0xd.debbca405c9f8p-16;
- constexpr f64 cot_c10 = -0x1.66a8edb99b15p-16;
- constexpr f64 cot_c12 = -0x2.450239be0ee92p-20;
- constexpr f64 cot_c14 = -0x3.ad6ddb4719438p-24;
- constexpr f64 cot_c16 = -0x5.ff4c42741356p-28;
- constexpr f64 cot_c18 = -0x9.06881bcdf3108p-32;
- constexpr f64 cot_c20 = -0x1.644abedc113cap-32;
-
- const vec<f64, N> x2 = x * x;
- const vec<f64, N> val =
- mask_horner(x2, inverse, 1.0, 1.0, cot_c2, tan_c2, cot_c4, tan_c4, cot_c6, tan_c6, cot_c8, tan_c8,
- cot_c10, tan_c10, cot_c12, tan_c12, cot_c14, tan_c14, cot_c16, tan_c16, cot_c18,
- tan_c18, cot_c20, tan_c20);
-
- const vec<f64, N> z = select(inverse, val / -x, val * x);
- return mulsign(z, x_full);
- }
- template <typename T>
- KFR_SINTRIN T tandeg(const T& x)
- {
- return tan(x * c_degtorad<T>);
- }
-
- KFR_HANDLE_SCALAR(tan)
- KFR_SPEC_FN(in_tan, tan)
- KFR_SPEC_FN(in_tan, tandeg)
-};
+ mask<f32, N> inverse;
+ const vec<f32, N> x = trig_fold_simple(x_full, inverse);
+
+ constexpr f32 tan_c2 = 0x5.555378p-4;
+ constexpr f32 tan_c4 = 0x2.225bb8p-4;
+ constexpr f32 tan_c6 = 0xd.ac3fep-8;
+ constexpr f32 tan_c8 = 0x6.41644p-8;
+ constexpr f32 tan_c10 = 0xc.bfe7ep-12;
+ constexpr f32 tan_c12 = 0x2.6754dp-8;
+
+ constexpr f32 cot_c2 = -0x5.555558p-4;
+ constexpr f32 cot_c4 = -0x5.b0581p-8;
+ constexpr f32 cot_c6 = -0x8.ac5ccp-12;
+ constexpr f32 cot_c8 = -0xd.aaa01p-16;
+ constexpr f32 cot_c10 = -0x1.a9a9b4p-16;
+ constexpr f32 cot_c12 = -0x6.f7d4dp-24;
+
+ const vec<f32, N> x2 = x * x;
+ const vec<f32, N> val = trig_horner(x2, inverse, 1.0f, 1.0f, cot_c2, tan_c2, cot_c4, tan_c4, cot_c6,
+ tan_c6, cot_c8, tan_c8, cot_c10, tan_c10, cot_c12, tan_c12);
+
+ const vec<f32, N> z = select(inverse, val / -x, val * x);
+ return mulsign(z, x_full);
}
-namespace native
+template <size_t N>
+KFR_SINTRIN vec<f64, N> tan(vec<f64, N> x_full)
{
-using fn_tan = internal::in_tan<>::fn_tan;
+ mask<f64, N> inverse;
+ const vec<f64, N> x = trig_fold_simple(x_full, inverse);
+
+ constexpr f64 tan_c2 = 0x5.5555554d8e5b8p-4;
+ constexpr f64 tan_c4 = 0x2.222224820264p-4;
+ constexpr f64 tan_c6 = 0xd.d0d90de32b3e8p-8;
+ constexpr f64 tan_c8 = 0x5.99723bdcf5cacp-8;
+ constexpr f64 tan_c10 = 0x2.434a142e413ap-8;
+ constexpr f64 tan_c12 = 0xf.2b59061305efp-12;
+ constexpr f64 tan_c14 = 0x4.a12565071a664p-12;
+ constexpr f64 tan_c16 = 0x4.dada3797ac1bcp-12;
+ constexpr f64 tan_c18 = -0x1.a74976b6ea3f3p-12;
+ constexpr f64 tan_c20 = 0x1.d06a5ae5e4a74p-12;
+
+ constexpr f64 cot_c2 = -0x5.5555555555554p-4;
+ constexpr f64 cot_c4 = -0x5.b05b05b05b758p-8;
+ constexpr f64 cot_c6 = -0x8.ab355dffc79a8p-12;
+ constexpr f64 cot_c8 = -0xd.debbca405c9f8p-16;
+ constexpr f64 cot_c10 = -0x1.66a8edb99b15p-16;
+ constexpr f64 cot_c12 = -0x2.450239be0ee92p-20;
+ constexpr f64 cot_c14 = -0x3.ad6ddb4719438p-24;
+ constexpr f64 cot_c16 = -0x5.ff4c42741356p-28;
+ constexpr f64 cot_c18 = -0x9.06881bcdf3108p-32;
+ constexpr f64 cot_c20 = -0x1.644abedc113cap-32;
+
+ const vec<f64, N> x2 = x * x;
+ const vec<f64, N> val = trig_horner(x2, inverse, 1.0, 1.0, cot_c2, tan_c2, cot_c4, tan_c4, cot_c6, tan_c6,
+ cot_c8, tan_c8, cot_c10, tan_c10, cot_c12, tan_c12, cot_c14, tan_c14,
+ cot_c16, tan_c16, cot_c18, tan_c18, cot_c20, tan_c20);
+
+ const vec<f64, N> z = select(inverse, val / -x, val * x);
+ return mulsign(z, x_full);
+}
+template <typename T>
+KFR_SINTRIN T tandeg(const T& x)
+{
+ return tan(x * c_degtorad<T>);
+}
+
+KFR_HANDLE_SCALAR(tan)
+KFR_FN(tan)
+KFR_FN(tandeg)
+}
+
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> tan(const T1& x)
+KFR_INTRIN T1 tan(const T1& x)
{
- return internal::in_tan<>::tan(x);
+ return internal::tan(x);
}
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_tan, E1> tan(E1&& x)
+KFR_INTRIN expr_func<internal::fn_tan, E1> tan(E1&& x)
{
- return { fn_tan(), std::forward<E1>(x) };
+ return { {}, std::forward<E1>(x) };
}
-using fn_tandeg = internal::in_tan<>::fn_tandeg;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> tandeg(const T1& x)
+KFR_INTRIN T1 tandeg(const T1& x)
{
- return internal::in_tan<>::tandeg(x);
+ return internal::tandeg(x);
}
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_tandeg, E1> tandeg(E1&& x)
+KFR_INTRIN expr_func<internal::fn_tandeg, E1> tandeg(E1&& x)
{
- return { fn_tandeg(), std::forward<E1>(x) };
+ return { {}, std::forward<E1>(x) };
}
}
-}
-
-#pragma clang diagnostic pop
diff --git a/include/kfr/dft/conv.hpp b/include/kfr/dft/conv.hpp
@@ -27,7 +27,6 @@
#include "../base/memory.hpp"
#include "../base/read_write.hpp"
#include "../base/vec.hpp"
-#include "../expressions/operators.hpp"
#include "fft.hpp"
diff --git a/include/kfr/dft/fft.hpp b/include/kfr/dft/fft.hpp
@@ -215,8 +215,8 @@ KFR_NOINLINE cvec<T, 1> calculate_twiddle(size_t n, size_t size)
else
{
double kth = c_pi<double, 2> * (n / static_cast<double>(size));
- double tcos = +kfr::native::cos(kth);
- double tsin = -kfr::native::sin(kth);
+ double tcos = +kfr::cos(kth);
+ double tsin = -kfr::sin(kth);
return make_vector(static_cast<T>(tcos), static_cast<T>(tsin));
}
}
diff --git a/include/kfr/dft/ft.hpp b/include/kfr/dft/ft.hpp
@@ -485,7 +485,7 @@ constexpr cvec<T, N> twiddleimagmask()
template <typename T, size_t N>
KFR_NOINLINE static vec<T, N> cossin_conj(vec<T, N> x)
{
- return cconj(in_sin_cos<cpu_t::native>::cossin(x));
+ return cconj(cossin(x));
}
template <size_t k, size_t size, bool inverse = false, typename T, size_t width>
diff --git a/include/kfr/dsp/fir.hpp b/include/kfr/dsp/fir.hpp
@@ -26,7 +26,6 @@
#include "../base/sin_cos.hpp"
#include "../base/vec.hpp"
#include "../expressions/basic.hpp"
-#include "../expressions/operators.hpp"
#include "../expressions/reduce.hpp"
#include "window.hpp"
@@ -38,90 +37,72 @@ using fir_taps = univector<T, Size>;
namespace internal
{
-template <cpu_t cpu = cpu_t::native>
-struct in_fir : in_reduce<cpu>
+template <size_t tapcount, typename T, typename E1>
+struct expression_short_fir : expression<E1>
{
-private:
- using in_reduce<cpu>::dotproduct;
+ static_assert(is_poweroftwo(tapcount), "tapcount must be a power of two");
-public:
- template <size_t tapcount, typename T, typename E1>
- struct expression_short_fir : expression<E1>
+ expression_short_fir(E1&& e1, const array_ref<T>& taps)
+ : expression<E1>(std::forward<E1>(e1)), taps(taps), delayline(0)
{
- static_assert(is_poweroftwo(tapcount), "tapcount must be a power of two");
- template <cpu_t newcpu>
- using retarget_this =
- typename in_fir<newcpu>::template expression_short_fir<tapcount, T, retarget<E1, newcpu>>;
-
- expression_short_fir(E1&& e1, const array_ref<T>& taps)
- : expression<E1>(std::forward<E1>(e1)), taps(taps), delayline(0)
- {
- }
- template <typename U, size_t N>
- KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N> x) const
- {
- vec<T, N> in = cast<T>(this->argument_first(index, x));
+ }
+ template <typename U, size_t N>
+ KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N> x) const
+ {
+ vec<T, N> in = cast<T>(this->argument_first(index, x));
- vec<T, N> out = in * taps[0];
- cfor(csize<1>, csize<tapcount>,
- [&](auto I) { out = out + concat_and_slice<tapcount - 1 - I, N>(delayline, in) * taps[I]; });
- delayline = concat_and_slice<N, tapcount - 1>(delayline, in);
+ vec<T, N> out = in * taps[0];
+ cfor(csize<1>, csize<tapcount>,
+ [&](auto I) { out = out + concat_and_slice<tapcount - 1 - I, N>(delayline, in) * taps[I]; });
+ delayline = concat_and_slice<N, tapcount - 1>(delayline, in);
- return cast<U>(out);
- }
- vec<T, tapcount> taps;
- mutable vec<T, tapcount - 1> delayline;
- };
+ return cast<U>(out);
+ }
+ vec<T, tapcount> taps;
+ mutable vec<T, tapcount - 1> delayline;
+};
- template <typename T, typename E1>
- struct expression_fir : expression<E1>
+template <typename T, typename E1>
+struct expression_fir : expression<E1>
+{
+ expression_fir(E1&& e1, const array_ref<const T>& taps)
+ : expression<E1>(std::forward<E1>(e1)), taps(taps), delayline(taps.size(), T()), delayline_cursor(0)
+ {
+ }
+ template <typename U, size_t N>
+ KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N> x) const
{
- template <cpu_t newcpu>
- using retarget_this = typename in_fir<newcpu>::template expression_fir<T, retarget<E1, newcpu>>;
+ const size_t tapcount = taps.size();
+ const vec<T, N> input = cast<T>(this->argument_first(index, x));
- expression_fir(E1&& e1, const array_ref<const T>& taps)
- : expression<E1>(std::forward<E1>(e1)), taps(taps), delayline(taps.size(), T()),
- delayline_cursor(0)
+ vec<T, N> output;
+ size_t cursor = delayline_cursor;
+ KFR_LOOP_NOUNROLL
+ for (size_t i = 0; i < N; i++)
{
+ delayline.ringbuf_write(cursor, input[i]);
+ output(i) = dotproduct(taps, delayline.slice(cursor) /*, tapcount - cursor*/) +
+ dotproduct(taps.slice(tapcount - cursor), delayline /*, cursor*/);
}
- template <typename U, size_t N>
- KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N> x) const
- {
- const size_t tapcount = taps.size();
- const vec<T, N> input = cast<T>(this->argument_first(index, x));
-
- vec<T, N> output;
- size_t cursor = delayline_cursor;
- KFR_LOOP_NOUNROLL
- for (size_t i = 0; i < N; i++)
- {
- delayline.ringbuf_write(cursor, input[i]);
- output(i) = dotproduct(taps, delayline.slice(cursor) /*, tapcount - cursor*/) +
- dotproduct(taps.slice(tapcount - cursor), delayline /*, cursor*/);
- }
- delayline_cursor = cursor;
- return cast<U>(output);
- }
- univector_dyn<T> taps;
- mutable univector_dyn<T> delayline;
- mutable size_t delayline_cursor;
- };
+ delayline_cursor = cursor;
+ return cast<U>(output);
+ }
+ univector_dyn<T> taps;
+ mutable univector_dyn<T> delayline;
+ mutable size_t delayline_cursor;
};
}
-namespace native
-{
template <typename T, typename E1, size_t Tag>
-KFR_INLINE internal::in_fir<>::expression_fir<T, E1> fir(E1&& e1, const univector<T, Tag>& taps)
+KFR_INLINE internal::expression_fir<T, E1> fir(E1&& e1, const univector<T, Tag>& taps)
{
- return internal::in_fir<>::expression_fir<T, E1>(std::forward<E1>(e1), taps.ref());
+ return internal::expression_fir<T, E1>(std::forward<E1>(e1), taps.ref());
}
template <typename T, size_t TapCount, typename E1>
-KFR_INLINE internal::in_fir<>::expression_short_fir<TapCount, T, E1> short_fir(
- E1&& e1, const univector<T, TapCount>& taps)
+KFR_INLINE internal::expression_short_fir<TapCount, T, E1> short_fir(E1&& e1,
+ const univector<T, TapCount>& taps)
{
static_assert(TapCount >= 1 && TapCount < 16, "Use short_fir only for small FIR filters");
- return internal::in_fir<>::expression_short_fir<TapCount, T, E1>(std::forward<E1>(e1), taps.ref());
-}
+ return internal::expression_short_fir<TapCount, T, E1>(std::forward<E1>(e1), taps.ref());
}
}
diff --git a/include/kfr/dsp/fir_design.hpp b/include/kfr/dsp/fir_design.hpp
@@ -24,218 +24,124 @@
#include "fir.hpp"
-#pragma clang diagnostic push
-#if CID_HAS_WARNING("-Winaccessible-base")
-#pragma clang diagnostic ignored "-Winaccessible-base"
-#endif
-
namespace kfr
{
namespace internal
{
-template <cpu_t cpu = cpu_t::native>
-struct in_fir_design : in_sqrt<cpu>,
- in_abs<cpu>,
- in_log_exp<cpu>,
- in_sin_cos<cpu>,
- in_window<cpu>,
- in_reduce<cpu>
+template <typename T>
+KFR_SINTRIN void fir_lowpass(univector_ref<T> taps, T cutoff, const expression_pointer<T>& window,
+ bool normalize = true)
{
-private:
- using in_sqrt<cpu>::sqrt;
- using in_abs<cpu>::abs;
- using in_log_exp<cpu>::log;
- using in_log_exp<cpu>::exp;
- using in_log_exp<cpu>::log_fmadd;
- using in_log_exp<cpu>::exp_fmadd;
- using in_log_exp<cpu>::exp10;
- using typename in_sin_cos<cpu>::fn_sinc;
- using in_reduce<cpu>::reduce;
- using in_reduce<cpu>::dotproduct;
- using in_reduce<cpu>::sum;
-
-public:
- template <typename T>
- KFR_SINTRIN void fir_lowpass(univector_ref<T> taps, T cutoff, const expression_pointer<T>& window,
- bool normalize = true)
+ const T scale = 2.0 * cutoff;
+ taps = bind_expression(fn_sinc(),
+ symmlinspace<T, true>((taps.size() - 1) * cutoff * c_pi<T>, taps.size(), true)) *
+ scale * window;
+
+ if (is_odd(taps.size()))
+ taps[taps.size() / 2] = scale;
+
+ if (normalize)
{
- const T scale = 2.0 * cutoff;
- taps = bind_expression(fn_sinc(), symmlinspace<T, true>((taps.size() - 1) * cutoff * c_pi<T>,
- taps.size(), true)) *
- scale * window;
-
- if (is_odd(taps.size()))
- taps[taps.size() / 2] = scale;
-
- if (normalize)
- {
- const T invsum = reciprocal(sum(taps));
- taps = taps * invsum;
- }
+ const T invsum = reciprocal(sum(taps));
+ taps = taps * invsum;
}
- template <typename T>
- KFR_SINTRIN void fir_highpass(univector_ref<T> taps, T cutoff, const expression_pointer<T>& window,
- bool normalize = true)
+}
+template <typename T>
+KFR_SINTRIN void fir_highpass(univector_ref<T> taps, T cutoff, const expression_pointer<T>& window,
+ bool normalize = true)
+{
+ const T scale = 2.0 * -cutoff;
+ taps = bind_expression(fn_sinc(),
+ symmlinspace<T, true>((taps.size() - 1) * cutoff * c_pi<T>, taps.size(), true)) *
+ scale * window;
+
+ if (is_odd(taps.size()))
+ taps[taps.size() / 2] = 1 - 2.0 * cutoff;
+
+ if (normalize)
{
- const T scale = 2.0 * -cutoff;
- taps = bind_expression(fn_sinc(), symmlinspace<T, true>((taps.size() - 1) * cutoff * c_pi<T>,
- taps.size(), true)) *
- scale * window;
-
- if (is_odd(taps.size()))
- taps[taps.size() / 2] = 1 - 2.0 * cutoff;
-
- if (normalize)
- {
- const T invsum = reciprocal(sum(taps) + 1);
- taps = taps * invsum;
- }
+ const T invsum = reciprocal(sum(taps) + 1);
+ taps = taps * invsum;
}
+}
+
+template <typename T>
+KFR_SINTRIN void fir_bandpass(univector_ref<T> taps, T frequency1, T frequency2,
+ const expression_pointer<T>& window, bool normalize = true)
+{
+ const T scale1 = 2.0 * frequency1;
+ const T scale2 = 2.0 * frequency2;
+ const T sc = c_pi<T> * T(taps.size() - 1);
+ const T start1 = sc * frequency1;
+ const T start2 = sc * frequency2;
+
+ taps = (bind_expression(fn_sinc(), symmlinspace<T, true>(start2, taps.size(), true)) * scale2 -
+ bind_expression(fn_sinc(), symmlinspace<T, true>(start1, taps.size(), true)) * scale1) *
+ window;
- template <typename T>
- KFR_SINTRIN void fir_bandpass(univector_ref<T> taps, T frequency1, T frequency2,
- const expression_pointer<T>& window, bool normalize = true)
+ if (is_odd(taps.size()))
+ taps[taps.size() / 2] = 2 * (frequency2 - frequency1);
+
+ if (normalize)
{
- const T scale1 = 2.0 * frequency1;
- const T scale2 = 2.0 * frequency2;
- const T sc = c_pi<T> * T(taps.size() - 1);
- const T start1 = sc * frequency1;
- const T start2 = sc * frequency2;
-
- taps = (bind_expression(fn_sinc(), symmlinspace<T, true>(start2, taps.size(), true)) * scale2 -
- bind_expression(fn_sinc(), symmlinspace<T, true>(start1, taps.size(), true)) * scale1) *
- window;
-
- if (is_odd(taps.size()))
- taps[taps.size() / 2] = 2 * (frequency2 - frequency1);
-
- if (normalize)
- {
- const T invsum = reciprocal(sum(taps) + 1);
- taps = taps * invsum;
- }
+ const T invsum = reciprocal(sum(taps) + 1);
+ taps = taps * invsum;
}
+}
+
+template <typename T>
+KFR_SINTRIN void fir_bandstop(univector_ref<T> taps, T frequency1, T frequency2,
+ const expression_pointer<T>& window, bool normalize = true)
+{
+ const T scale1 = 2.0 * frequency1;
+ const T scale2 = 2.0 * frequency2;
+ const T sc = c_pi<T> * T(taps.size() - 1);
+ const T start1 = sc * frequency1;
+ const T start2 = sc * frequency2;
+
+ taps = (bind_expression(fn_sinc(), symmlinspace<T, true>(start1, taps.size(), true)) * scale1 -
+ bind_expression(fn_sinc(), symmlinspace<T, true>(start2, taps.size(), true)) * scale2) *
+ window;
+
+ if (is_odd(taps.size()))
+ taps[taps.size() / 2] = 1 - 2 * (frequency2 - frequency1);
- template <typename T>
- KFR_SINTRIN void fir_bandstop(univector_ref<T> taps, T frequency1, T frequency2,
- const expression_pointer<T>& window, bool normalize = true)
+ if (normalize)
{
- const T scale1 = 2.0 * frequency1;
- const T scale2 = 2.0 * frequency2;
- const T sc = c_pi<T> * T(taps.size() - 1);
- const T start1 = sc * frequency1;
- const T start2 = sc * frequency2;
-
- taps = (bind_expression(fn_sinc(), symmlinspace<T, true>(start1, taps.size(), true)) * scale1 -
- bind_expression(fn_sinc(), symmlinspace<T, true>(start2, taps.size(), true)) * scale2) *
- window;
-
- if (is_odd(taps.size()))
- taps[taps.size() / 2] = 1 - 2 * (frequency2 - frequency1);
-
- if (normalize)
- {
- const T invsum = reciprocal(sum(taps));
- taps = taps * invsum;
- }
+ const T invsum = reciprocal(sum(taps));
+ taps = taps * invsum;
}
+}
- template <size_t tapcount, typename T, typename E1>
- struct expression_short_fir : expression<E1>
- {
- static_assert(is_poweroftwo(tapcount), "tapcount must be a power of two");
- template <cpu_t newcpu>
- using retarget_this =
- typename in_fir<newcpu>::template expression_short_fir<tapcount, T, retarget<E1, newcpu>>;
-
- expression_short_fir(E1&& e1, const array_ref<T>& taps)
- : expression<E1>(std::forward<E1>(e1)), taps(taps), delayline(0)
- {
- }
- template <typename U, size_t N>
- KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N> x) const
- {
- vec<T, N> in = cast<T>(this->argument_first(index, x));
-
- vec<T, N> out = in * taps[0];
- cfor(csize<1>, csize<tapcount>,
- [&](auto I) { out = out + concat_and_slice<tapcount - 1 - I, N>(delayline, in) * taps[I]; });
- delayline = concat_and_slice<N, tapcount - 1>(delayline, in);
-
- return cast<U>(out);
- }
- vec<T, tapcount> taps;
- mutable vec<T, tapcount - 1> delayline;
- };
-
- template <typename T, typename E1>
- struct expression_fir : expression<E1>
- {
- template <cpu_t newcpu>
- using retarget_this = typename in_fir<newcpu>::template expression_fir<T, retarget<E1, newcpu>>;
-
- expression_fir(E1&& e1, const array_ref<const T>& taps)
- : expression<E1>(std::forward<E1>(e1)), taps(taps), delayline(taps.size(), T()),
- delayline_cursor(0)
- {
- }
- template <typename U, size_t N>
- KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N> x) const
- {
- const size_t tapcount = taps.size();
- const vec<T, N> input = cast<T>(this->argument_first(index, x));
-
- vec<T, N> output;
- size_t cursor = delayline_cursor;
- KFR_LOOP_NOUNROLL
- for (size_t i = 0; i < N; i++)
- {
- delayline.ringbuf_write(cursor, input[i]);
- output(i) = dotproduct(taps, delayline.slice(cursor) /*, tapcount - cursor*/) +
- dotproduct(taps.slice(tapcount - cursor), delayline /*, cursor*/);
- }
- delayline_cursor = cursor;
- return cast<U>(output);
- }
- univector_dyn<T> taps;
- mutable univector_dyn<T> delayline;
- mutable size_t delayline_cursor;
- };
- KFR_SPEC_FN(in_fir_design, fir_lowpass)
- KFR_SPEC_FN(in_fir_design, fir_highpass)
- KFR_SPEC_FN(in_fir_design, fir_bandpass)
- KFR_SPEC_FN(in_fir_design, fir_bandstop)
-};
+KFR_FN(fir_lowpass)
+KFR_FN(fir_highpass)
+KFR_FN(fir_bandpass)
+KFR_FN(fir_bandstop)
}
-namespace native
-{
template <typename T, size_t Tag>
KFR_INLINE void fir_lowpass(univector<T, Tag>& taps, identity<T> cutoff, const expression_pointer<T>& window,
bool normalize = true)
{
- return internal::in_fir_design<>::fir_lowpass(taps.slice(), cutoff, window, normalize);
+ return internal::fir_lowpass(taps.slice(), cutoff, window, normalize);
}
template <typename T, size_t Tag>
KFR_INLINE void fir_highpass(univector<T, Tag>& taps, identity<T> cutoff, const expression_pointer<T>& window,
bool normalize = true)
{
- return internal::in_fir_design<>::fir_highpass(taps.slice(), cutoff, window, normalize);
+ return internal::fir_highpass(taps.slice(), cutoff, window, normalize);
}
template <typename T, size_t Tag>
KFR_INLINE void fir_bandpass(univector<T, Tag>& taps, identity<T> frequency1, identity<T> frequency2,
const expression_pointer<T>& window, bool normalize = true)
{
- return internal::in_fir_design<>::fir_bandpass(taps.slice(), frequency1, frequency2, window, normalize);
+ return internal::fir_bandpass(taps.slice(), frequency1, frequency2, window, normalize);
}
template <typename T, size_t Tag>
KFR_INLINE void fir_bandstop(univector<T, Tag>& taps, identity<T> frequency1, identity<T> frequency2,
const expression_pointer<T>& window, bool normalize = true)
{
- return internal::in_fir_design<>::fir_bandstop(taps.slice(), frequency1, frequency2, window, normalize);
-}
+ return internal::fir_bandstop(taps.slice(), frequency1, frequency2, window, normalize);
}
}
-
-#pragma clang diagnostic pop
diff --git a/include/kfr/dsp/fracdelay.hpp b/include/kfr/dsp/fracdelay.hpp
@@ -27,18 +27,14 @@
namespace kfr
{
-namespace native
-{
-
template <typename T, typename E1>
-KFR_INLINE internal::in_fir<>::expression_short_fir<2, T, E1> fracdelay(E1&& e1, T delay)
+KFR_INLINE internal::expression_short_fir<2, T, E1> fracdelay(E1&& e1, T delay)
{
if (delay < 0)
delay = 0;
if (delay > 1)
delay = fract(delay);
univector<T, 2> taps({ 1 - delay, delay });
- return internal::in_fir<>::expression_short_fir<2, T, E1>(std::forward<E1>(e1), taps.ref());
-}
+ return internal::expression_short_fir<2, T, E1>(std::forward<E1>(e1), taps.ref());
}
}
diff --git a/include/kfr/dsp/oscillators.hpp b/include/kfr/dsp/oscillators.hpp
@@ -23,28 +23,20 @@
#pragma once
#include "../base/sin_cos.hpp"
-#include "../base/vec.hpp"
#include "../expressions/basic.hpp"
-#pragma clang diagnostic push
-#if CID_HAS_WARNING("-Winaccessible-base")
-#pragma clang diagnostic ignored "-Winaccessible-base"
-#endif
-
namespace kfr
{
template <typename T>
auto jaehne(T magn, size_t size)
{
- using namespace native;
return typed<T>(magn * sin(c_pi<T, 1, 2> * sqr(linspace(T(0), T(size), size, false)) / size), size);
}
template <typename T>
auto swept(T magn, size_t size)
{
- using namespace native;
return typed<T>(
magn * sin(c_pi<T, 1, 4> * sqr(sqr(linspace(T(0), T(size), size, false)) / sqr(T(size))) * T(size)),
size);
@@ -52,277 +44,235 @@ auto swept(T magn, size_t size)
namespace internal
{
-template <cpu_t c = cpu_t::native, cpu_t cc = c>
-struct in_oscillators : in_sin_cos<cc>, in_select<cc>, in_round<cc>, in_abs<cc>
+template <typename T>
+KFR_SINTRIN T rawsine(T x)
{
-private:
- using in_sin_cos<cc>::fastsin;
- using in_sin_cos<cc>::sin;
- using in_select<cc>::select;
- using in_round<cc>::fract;
- using in_abs<cc>::abs;
-
-public:
- template <typename T>
- KFR_SINTRIN T rawsine(T x)
- {
- return fastsin(x * c_pi<T, 2>);
- }
- template <typename T>
- KFR_SINTRIN T sinenorm(T x)
- {
- return rawsine(fract(x));
- }
- template <typename T>
- KFR_SINTRIN T sine(T x)
- {
- return sinenorm(c_recip_pi<T, 1, 2> * x);
- }
+ return fastsin(x * c_pi<T, 2>);
+}
+template <typename T>
+KFR_SINTRIN T sinenorm(T x)
+{
+ return rawsine(fract(x));
+}
+template <typename T>
+KFR_SINTRIN T sine(T x)
+{
+ return sinenorm(c_recip_pi<T, 1, 2> * x);
+}
- template <typename T>
- KFR_SINTRIN T rawsquare(T x)
- {
- return select(x < T(0.5), T(1), -T(1));
- }
- template <typename T>
- KFR_SINTRIN T squarenorm(T x)
- {
- return rawsquare(fract(x));
- }
- template <typename T>
- KFR_SINTRIN T square(T x)
- {
- return squarenorm(c_recip_pi<T, 1, 2> * x);
- }
+template <typename T>
+KFR_SINTRIN T rawsquare(T x)
+{
+ return select(x < T(0.5), T(1), -T(1));
+}
+template <typename T>
+KFR_SINTRIN T squarenorm(T x)
+{
+ return rawsquare(fract(x));
+}
+template <typename T>
+KFR_SINTRIN T square(T x)
+{
+ return squarenorm(c_recip_pi<T, 1, 2> * x);
+}
- template <typename T>
- KFR_SINTRIN T rawsawtooth(T x)
- {
- return T(1) - 2 * x;
- }
- template <typename T>
- KFR_SINTRIN T sawtoothnorm(T x)
- {
- return rawsawtooth(fract(x));
- }
- template <typename T>
- KFR_SINTRIN T sawtooth(T x)
- {
- return sawtoothnorm(c_recip_pi<T, 1, 2> * x);
- }
+template <typename T>
+KFR_SINTRIN T rawsawtooth(T x)
+{
+ return T(1) - 2 * x;
+}
+template <typename T>
+KFR_SINTRIN T sawtoothnorm(T x)
+{
+ return rawsawtooth(fract(x));
+}
+template <typename T>
+KFR_SINTRIN T sawtooth(T x)
+{
+ return sawtoothnorm(c_recip_pi<T, 1, 2> * x);
+}
- template <typename T>
- KFR_SINTRIN T isawtoothnorm(T x)
- {
- return T(-1) + 2 * fract(x + 0.5);
- }
- template <typename T>
- KFR_SINTRIN T isawtooth(T x)
- {
- return isawtoothnorm(c_recip_pi<T, 1, 2> * x);
- }
+template <typename T>
+KFR_SINTRIN T isawtoothnorm(T x)
+{
+ return T(-1) + 2 * fract(x + 0.5);
+}
+template <typename T>
+KFR_SINTRIN T isawtooth(T x)
+{
+ return isawtoothnorm(c_recip_pi<T, 1, 2> * x);
+}
- template <typename T>
- KFR_SINTRIN T rawtriangle(T x)
- {
- return 1 - abs(4 * x - 2);
- }
- template <typename T>
- KFR_SINTRIN T trianglenorm(T x)
- {
- return rawtriangle(fract(x + 0.25));
- }
- template <typename T>
- KFR_SINTRIN T triangle(T x)
- {
- return trianglenorm(c_recip_pi<T, 1, 2> * x);
- }
+template <typename T>
+KFR_SINTRIN T rawtriangle(T x)
+{
+ return 1 - abs(4 * x - 2);
+}
+template <typename T>
+KFR_SINTRIN T trianglenorm(T x)
+{
+ return rawtriangle(fract(x + 0.25));
+}
+template <typename T>
+KFR_SINTRIN T triangle(T x)
+{
+ return trianglenorm(c_recip_pi<T, 1, 2> * x);
+}
- KFR_SPEC_FN(in_oscillators, rawsine)
- KFR_SPEC_FN(in_oscillators, sine)
- KFR_SPEC_FN(in_oscillators, sinenorm)
- KFR_SPEC_FN(in_oscillators, rawsquare)
- KFR_SPEC_FN(in_oscillators, square)
- KFR_SPEC_FN(in_oscillators, squarenorm)
- KFR_SPEC_FN(in_oscillators, rawtriangle)
- KFR_SPEC_FN(in_oscillators, triangle)
- KFR_SPEC_FN(in_oscillators, trianglenorm)
- KFR_SPEC_FN(in_oscillators, rawsawtooth)
- KFR_SPEC_FN(in_oscillators, sawtooth)
- KFR_SPEC_FN(in_oscillators, sawtoothnorm)
- KFR_SPEC_FN(in_oscillators, isawtooth)
- KFR_SPEC_FN(in_oscillators, isawtoothnorm)
-};
+KFR_FN(rawsine)
+KFR_FN(sine)
+KFR_FN(sinenorm)
+KFR_FN(rawsquare)
+KFR_FN(square)
+KFR_FN(squarenorm)
+KFR_FN(rawtriangle)
+KFR_FN(triangle)
+KFR_FN(trianglenorm)
+KFR_FN(rawsawtooth)
+KFR_FN(sawtooth)
+KFR_FN(sawtoothnorm)
+KFR_FN(isawtooth)
+KFR_FN(isawtoothnorm)
}
-using fn_rawsine = internal::in_oscillators<>::fn_rawsine;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> rawsine(const T1& x)
+KFR_INTRIN T1 rawsine(const T1& x)
{
- return internal::in_oscillators<>::rawsine(x);
+ return internal::rawsine(x);
}
-
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_rawsine, E1> rawsine(E1&& x)
+KFR_INTRIN expr_func<internal::fn_rawsine, E1> rawsine(E1&& x)
{
return { {}, std::forward<E1>(x) };
}
-using fn_sine = internal::in_oscillators<>::fn_sine;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> sine(const T1& x)
+KFR_INTRIN T1 sine(const T1& x)
{
- return internal::in_oscillators<>::sine(x);
+ return internal::sine(x);
}
-
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_sine, E1> sine(E1&& x)
+KFR_INTRIN expr_func<internal::fn_sine, E1> sine(E1&& x)
{
return { {}, std::forward<E1>(x) };
}
-using fn_sinenorm = internal::in_oscillators<>::fn_sinenorm;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> sinenorm(const T1& x)
+KFR_INTRIN T1 sinenorm(const T1& x)
{
- return internal::in_oscillators<>::sinenorm(x);
+ return internal::sinenorm(x);
}
-
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_sinenorm, E1> sinenorm(E1&& x)
+KFR_INTRIN expr_func<internal::fn_sinenorm, E1> sinenorm(E1&& x)
{
return { {}, std::forward<E1>(x) };
}
-using fn_rawsquare = internal::in_oscillators<>::fn_rawsquare;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> rawsquare(const T1& x)
+KFR_INTRIN T1 rawsquare(const T1& x)
{
- return internal::in_oscillators<>::rawsquare(x);
+ return internal::rawsquare(x);
}
-
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_rawsquare, E1> rawsquare(E1&& x)
+KFR_INTRIN expr_func<internal::fn_rawsquare, E1> rawsquare(E1&& x)
{
return { {}, std::forward<E1>(x) };
}
-using fn_square = internal::in_oscillators<>::fn_square;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> square(const T1& x)
+KFR_INTRIN T1 square(const T1& x)
{
- return internal::in_oscillators<>::square(x);
+ return internal::square(x);
}
-
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_square, E1> square(E1&& x)
+KFR_INTRIN expr_func<internal::fn_square, E1> square(E1&& x)
{
return { {}, std::forward<E1>(x) };
}
-using fn_squarenorm = internal::in_oscillators<>::fn_squarenorm;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> squarenorm(const T1& x)
+KFR_INTRIN T1 squarenorm(const T1& x)
{
- return internal::in_oscillators<>::squarenorm(x);
+ return internal::squarenorm(x);
}
-
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_squarenorm, E1> squarenorm(E1&& x)
+KFR_INTRIN expr_func<internal::fn_squarenorm, E1> squarenorm(E1&& x)
{
return { {}, std::forward<E1>(x) };
}
-using fn_rawtriangle = internal::in_oscillators<>::fn_rawtriangle;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> rawtriangle(const T1& x)
+KFR_INTRIN T1 rawtriangle(const T1& x)
{
- return internal::in_oscillators<>::rawtriangle(x);
+ return internal::rawtriangle(x);
}
-
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_rawtriangle, E1> rawtriangle(E1&& x)
+KFR_INTRIN expr_func<internal::fn_rawtriangle, E1> rawtriangle(E1&& x)
{
return { {}, std::forward<E1>(x) };
}
-using fn_triangle = internal::in_oscillators<>::fn_triangle;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> triangle(const T1& x)
+KFR_INTRIN T1 triangle(const T1& x)
{
- return internal::in_oscillators<>::triangle(x);
+ return internal::triangle(x);
}
-
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_triangle, E1> triangle(E1&& x)
+KFR_INTRIN expr_func<internal::fn_triangle, E1> triangle(E1&& x)
{
return { {}, std::forward<E1>(x) };
}
-using fn_trianglenorm = internal::in_oscillators<>::fn_trianglenorm;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> trianglenorm(const T1& x)
+KFR_INTRIN T1 trianglenorm(const T1& x)
{
- return internal::in_oscillators<>::trianglenorm(x);
+ return internal::trianglenorm(x);
}
-
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_trianglenorm, E1> trianglenorm(E1&& x)
+KFR_INTRIN expr_func<internal::fn_trianglenorm, E1> trianglenorm(E1&& x)
{
return { {}, std::forward<E1>(x) };
}
-using fn_rawsawtooth = internal::in_oscillators<>::fn_rawsawtooth;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> rawsawtooth(const T1& x)
+KFR_INTRIN T1 rawsawtooth(const T1& x)
{
- return internal::in_oscillators<>::rawsawtooth(x);
+ return internal::rawsawtooth(x);
}
-
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_rawsawtooth, E1> rawsawtooth(E1&& x)
+KFR_INTRIN expr_func<internal::fn_rawsawtooth, E1> rawsawtooth(E1&& x)
{
return { {}, std::forward<E1>(x) };
}
-using fn_sawtooth = internal::in_oscillators<>::fn_sawtooth;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> sawtooth(const T1& x)
+KFR_INTRIN T1 sawtooth(const T1& x)
{
- return internal::in_oscillators<>::sawtooth(x);
+ return internal::sawtooth(x);
}
-
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_sawtooth, E1> sawtooth(E1&& x)
+KFR_INTRIN expr_func<internal::fn_sawtooth, E1> sawtooth(E1&& x)
{
return { {}, std::forward<E1>(x) };
}
-using fn_sawtoothnorm = internal::in_oscillators<>::fn_sawtoothnorm;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> sawtoothnorm(const T1& x)
+KFR_INTRIN T1 sawtoothnorm(const T1& x)
{
- return internal::in_oscillators<>::sawtoothnorm(x);
+ return internal::sawtoothnorm(x);
}
-
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_sawtoothnorm, E1> sawtoothnorm(E1&& x)
+KFR_INTRIN expr_func<internal::fn_sawtoothnorm, E1> sawtoothnorm(E1&& x)
{
return { {}, std::forward<E1>(x) };
}
-using fn_isawtooth = internal::in_oscillators<>::fn_isawtooth;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> isawtooth(const T1& x)
+KFR_INTRIN T1 isawtooth(const T1& x)
{
- return internal::in_oscillators<>::isawtooth(x);
+ return internal::isawtooth(x);
}
-
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_isawtooth, E1> isawtooth(E1&& x)
+KFR_INTRIN expr_func<internal::fn_isawtooth, E1> isawtooth(E1&& x)
{
return { {}, std::forward<E1>(x) };
}
-using fn_isawtoothnorm = internal::in_oscillators<>::fn_isawtoothnorm;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> isawtoothnorm(const T1& x)
+KFR_INTRIN T1 isawtoothnorm(const T1& x)
{
- return internal::in_oscillators<>::isawtoothnorm(x);
+ return internal::isawtoothnorm(x);
}
-
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_isawtoothnorm, E1> isawtoothnorm(E1&& x)
+KFR_INTRIN expr_func<internal::fn_isawtoothnorm, E1> isawtoothnorm(E1&& x)
{
return { {}, std::forward<E1>(x) };
}
}
-
-#pragma clang diagnostic pop
diff --git a/include/kfr/dsp/resample.hpp b/include/kfr/dsp/resample.hpp
@@ -28,11 +28,6 @@
#include "../expressions/reduce.hpp"
#include "window.hpp"
-#pragma clang diagnostic push
-#if CID_HAS_WARNING("-Winaccessible-base")
-#pragma clang diagnostic ignored "-Winaccessible-base"
-#endif
-
namespace kfr
{
namespace resample_quality
@@ -45,200 +40,174 @@ constexpr csize_t<10> high{};
namespace internal
{
-template <cpu_t cc = cpu_t::native>
-struct in_resampling : in_sqrt<cc>, in_abs<cc>, in_log_exp<cc>, in_sin_cos<cc>, in_window<cc>, in_reduce<cc>
+template <typename T1, typename T2>
+KFR_SINTRIN T1 resample_blackman(T1 n, T2 a)
{
-private:
- using in_sqrt<cc>::sqrt;
- using in_abs<cc>::abs;
- using in_log_exp<cc>::log;
- using in_log_exp<cc>::exp;
- using in_log_exp<cc>::log_fmadd;
- using in_log_exp<cc>::exp_fmadd;
- using in_log_exp<cc>::exp10;
- using in_sin_cos<cc>::cos;
- using in_sin_cos<cc>::sinc;
- using in_reduce<cc>::dotproduct;
- using in_reduce<cc>::sum;
-
-public:
- template <typename T1, typename T2>
- static inline T1 blackman(T1 n, T2 a)
- {
- const T1 a0 = (1 - a) * 0.5;
- const T1 a1 = 0.5;
- const T1 a2 = a * 0.5;
- n = n * c_pi<T1, 2>;
- return a0 - a1 * cos(n) + a2 * cos(2 * n);
- }
+ const T1 a0 = (1 - a) * 0.5;
+ const T1 a1 = 0.5;
+ const T1 a2 = a * 0.5;
+ n = n * c_pi<T1, 2>;
+ return a0 - a1 * cos(n) + a2 * cos(2 * n);
+}
- template <typename T, size_t quality>
- struct resampler
- {
- template <cpu_t newcpu>
- using retarget_this = typename in_resampling<newcpu>::template resampler<T, quality>;
+template <typename T, size_t quality>
+struct resampler
+{
+ using itype = i64;
- using itype = i64;
+ constexpr static itype depth = static_cast<itype>(1 << (quality + 1));
- constexpr static itype depth = static_cast<itype>(1 << (quality + 1));
+ resampler(itype interpolation_factor, itype decimation_factor, T scale = T(1), T cutoff = 0.49)
+ : input_position(0), output_position(0)
+ {
+ const i64 gcf = gcd(interpolation_factor, decimation_factor);
+ interpolation_factor /= gcf;
+ decimation_factor /= gcf;
- resampler(itype interpolation_factor, itype decimation_factor, T scale = T(1), T cutoff = 0.49)
- : input_position(0), output_position(0)
- {
- const i64 gcf = gcd(interpolation_factor, decimation_factor);
- interpolation_factor /= gcf;
- decimation_factor /= gcf;
+ taps = depth * interpolation_factor;
+ order = size_t(depth * interpolation_factor - 1);
- taps = depth * interpolation_factor;
- order = size_t(depth * interpolation_factor - 1);
+ this->interpolation_factor = interpolation_factor;
+ this->decimation_factor = decimation_factor;
- this->interpolation_factor = interpolation_factor;
- this->decimation_factor = decimation_factor;
+ const itype halftaps = taps / 2;
+ filter = univector<T>(size_t(taps), T());
+ delay = univector<T>(size_t(depth), T());
- const itype halftaps = taps / 2;
- filter = univector<T>(size_t(taps), T());
- delay = univector<T>(size_t(depth), T());
+ cutoff = cutoff / std::max(decimation_factor, interpolation_factor);
- cutoff = cutoff / std::max(decimation_factor, interpolation_factor);
+ for (itype j = 0, jj = 0; j < taps; j++)
+ {
+ filter[size_t(j)] = scale * 2 * interpolation_factor * cutoff *
+ sinc((jj - halftaps) * cutoff * c_pi<T, 2>) *
+ resample_blackman(T(jj) / T(taps - 1), T(0.16));
+ jj += size_t(interpolation_factor);
+ if (jj >= taps)
+ jj = jj - taps + 1;
+ }
- for (itype j = 0, jj = 0; j < taps; j++)
- {
- filter[size_t(j)] = scale * 2 * interpolation_factor * cutoff *
- sinc((jj - halftaps) * cutoff * c_pi<T, 2>) *
- blackman(T(jj) / T(taps - 1), T(0.16));
- jj += size_t(interpolation_factor);
- if (jj >= taps)
- jj = jj - taps + 1;
- }
+ const T s = reciprocal(sum(filter)) * interpolation_factor;
+ filter = filter * s;
+ }
+ KFR_INLINE size_t operator()(T* dest, size_t zerosize)
+ {
+ size_t outputsize = 0;
+ const itype srcsize = itype(zerosize);
- const T s = reciprocal(sum(filter)) * interpolation_factor;
- filter = filter * s;
- }
- KFR_INLINE size_t operator()(T* dest, size_t zerosize)
+ for (size_t i = 0;; i++)
{
- size_t outputsize = 0;
- const itype srcsize = itype(zerosize);
-
- for (size_t i = 0;; i++)
+ const itype ii = itype(i) + output_position;
+ const itype workindex = ii * (decimation_factor);
+ const itype workindex_rem = workindex % (interpolation_factor);
+ const itype start = workindex_rem ? (interpolation_factor)-workindex_rem : 0;
+ itype srcindex = workindex / (interpolation_factor);
+ srcindex = workindex_rem ? srcindex + 1 : srcindex;
+ const univector_ref<T> tap_ptr = filter.slice(static_cast<size_t>(start * depth));
+ srcindex = srcindex - (depth - 1);
+
+ if (srcindex + depth >= input_position + srcsize)
+ break;
+ outputsize++;
+
+ if (dest)
{
- const itype ii = itype(i) + output_position;
- const itype workindex = ii * (decimation_factor);
- const itype workindex_rem = workindex % (interpolation_factor);
- const itype start = workindex_rem ? (interpolation_factor)-workindex_rem : 0;
- itype srcindex = workindex / (interpolation_factor);
- srcindex = workindex_rem ? srcindex + 1 : srcindex;
- const univector_ref<T> tap_ptr = filter.slice(static_cast<size_t>(start * depth));
- srcindex = srcindex - (depth - 1);
-
- if (srcindex + depth >= input_position + srcsize)
- break;
- outputsize++;
-
- if (dest)
+ if (srcindex >= input_position)
{
- if (srcindex >= input_position)
- {
- dest[i] = T(0);
- }
- else
- {
- const itype prev_count = input_position - srcindex;
- dest[i] = dotproduct(delay.slice(size_t(depth - prev_count)), tap_ptr);
- }
+ dest[i] = T(0);
+ }
+ else
+ {
+ const itype prev_count = input_position - srcindex;
+ dest[i] = dotproduct(delay.slice(size_t(depth - prev_count)), tap_ptr);
}
}
- if (srcsize >= depth)
- {
- delay = zeros();
- }
- else
- {
- delay.slice(0, size_t(depth - srcsize)) = delay.slice(size_t(srcsize));
- delay.slice(size_t(depth - srcsize)) = zeros();
- }
-
- input_position += srcsize;
- output_position += outputsize;
- return outputsize;
}
- KFR_INLINE size_t operator()(T* dest, univector_ref<const T> src)
+ if (srcsize >= depth)
{
- size_t outputsize = 0;
- const itype srcsize = itype(src.size());
+ delay = zeros();
+ }
+ else
+ {
+ delay.slice(0, size_t(depth - srcsize)) = delay.slice(size_t(srcsize));
+ delay.slice(size_t(depth - srcsize)) = zeros();
+ }
+
+ input_position += srcsize;
+ output_position += outputsize;
+ return outputsize;
+ }
+ KFR_INLINE size_t operator()(T* dest, univector_ref<const T> src)
+ {
+ size_t outputsize = 0;
+ const itype srcsize = itype(src.size());
- for (size_t i = 0;; i++)
+ for (size_t i = 0;; i++)
+ {
+ const itype ii = itype(i) + output_position;
+ const itype workindex = ii * (decimation_factor);
+ const itype workindex_rem = workindex % (interpolation_factor);
+ const itype start = workindex_rem ? (interpolation_factor)-workindex_rem : 0;
+ itype srcindex = workindex / (interpolation_factor);
+ srcindex = workindex_rem ? srcindex + 1 : srcindex;
+ const univector_ref<T> tap_ptr = filter.slice(static_cast<size_t>(start * depth));
+ srcindex = srcindex - (depth - 1);
+
+ if (srcindex + depth >= input_position + srcsize)
+ break;
+ outputsize++;
+
+ if (dest)
{
- const itype ii = itype(i) + output_position;
- const itype workindex = ii * (decimation_factor);
- const itype workindex_rem = workindex % (interpolation_factor);
- const itype start = workindex_rem ? (interpolation_factor)-workindex_rem : 0;
- itype srcindex = workindex / (interpolation_factor);
- srcindex = workindex_rem ? srcindex + 1 : srcindex;
- const univector_ref<T> tap_ptr = filter.slice(static_cast<size_t>(start * depth));
- srcindex = srcindex - (depth - 1);
-
- if (srcindex + depth >= input_position + srcsize)
- break;
- outputsize++;
-
- if (dest)
+ if (srcindex >= input_position)
{
- if (srcindex >= input_position)
- {
- dest[i] = dotproduct(src.slice(size_t(srcindex - input_position), size_t(depth)),
- tap_ptr /*, depth*/);
- }
- else
- {
- const itype prev_count = input_position - srcindex;
- dest[i] =
- dotproduct(delay.slice(size_t(depth - prev_count)),
- tap_ptr /*, size_t(prev_count)*/) +
- dotproduct(src, tap_ptr.slice(
- size_t(prev_count),
- size_t(depth - prev_count)) /*, size_t(depth - prev_count)*/);
- }
+ dest[i] = dotproduct(src.slice(size_t(srcindex - input_position), size_t(depth)),
+ tap_ptr /*, depth*/);
+ }
+ else
+ {
+ const itype prev_count = input_position - srcindex;
+ dest[i] =
+ dotproduct(delay.slice(size_t(depth - prev_count)),
+ tap_ptr /*, size_t(prev_count)*/) +
+ dotproduct(
+ src, tap_ptr.slice(size_t(prev_count),
+ size_t(depth - prev_count)) /*, size_t(depth - prev_count)*/);
}
}
- if (srcsize >= depth)
- {
- delay = src.slice(size_t(srcsize - depth));
- }
- else
- {
- delay.slice(0, size_t(depth - srcsize)) = delay.slice(size_t(srcsize));
- delay.slice(size_t(depth - srcsize)) = src;
- }
-
- input_position += srcsize;
- output_position += outputsize;
- return outputsize;
}
- itype taps;
- size_t order;
- itype interpolation_factor;
- itype decimation_factor;
- univector<T> filter;
- univector<T> delay;
- itype input_position;
- itype output_position;
- };
+ if (srcsize >= depth)
+ {
+ delay = src.slice(size_t(srcsize - depth));
+ }
+ else
+ {
+ delay.slice(0, size_t(depth - srcsize)) = delay.slice(size_t(srcsize));
+ delay.slice(size_t(depth - srcsize)) = src;
+ }
+
+ input_position += srcsize;
+ output_position += outputsize;
+ return outputsize;
+ }
+ itype taps;
+ size_t order;
+ itype interpolation_factor;
+ itype decimation_factor;
+ univector<T> filter;
+ univector<T> delay;
+ itype input_position;
+ itype output_position;
};
}
-namespace native
-{
template <typename T, size_t quality>
-inline internal::in_resampling<>::resampler<T, quality> resampler(csize_t<quality>,
+inline internal::resampler<T, quality> resampler(csize_t<quality>,
size_t interpolation_factor,
size_t decimation_factor, T scale = T(1),
T cutoff = 0.49)
{
- using itype = typename internal::in_resampling<>::resampler<T, quality>::itype;
- return internal::in_resampling<>::resampler<T, quality>(itype(interpolation_factor),
+ using itype = typename internal::resampler<T, quality>::itype;
+ return internal::resampler<T, quality>(itype(interpolation_factor),
itype(decimation_factor), scale, cutoff);
}
}
-}
-
-#pragma clang diagnostic pop
diff --git a/include/kfr/dsp/units.hpp b/include/kfr/dsp/units.hpp
@@ -31,11 +31,6 @@
#include "../base/vec.hpp"
#include "../expressions/basic.hpp"
-#pragma clang diagnostic push
-#if CID_HAS_WARNING("-Winaccessible-base")
-#pragma clang diagnostic ignored "-Winaccessible-base"
-#endif
-
namespace kfr
{
@@ -43,177 +38,159 @@ using sample_rate_t = double;
namespace internal
{
-template <cpu_t c = cpu_t::native, cpu_t cc = c>
-struct in_dsp_units : in_log_exp<cc>, in_select<cc>, in_round<cc>, in_abs<cc>
-{
-private:
- using in_log_exp<cc>::log;
- using in_log_exp<cc>::exp;
- using in_log_exp<cc>::log10;
- using in_log_exp<cc>::exp10;
- using in_log_exp<cc>::exp_fmadd;
- using in_log_exp<cc>::log_fmadd;
- using in_select<cc>::select;
- using in_round<cc>::fract;
- using in_abs<cc>::abs;
-
-public:
- template <typename T, typename TF = ftype<T>>
- KFR_SINTRIN TF amp_to_dB(T amp)
- {
- return log(cast<subtype<TF>>(amp)) * subtype<TF>(8.6858896380650365530225783783322);
- // return T( 20.0 ) * log10( level );
- }
-
- template <typename T, typename TF = ftype<T>>
- KFR_SINTRIN TF dB_to_amp(T dB)
- {
- return exp(dB * subtype<TF>(0.11512925464970228420089957273422));
- // return exp10( dB / 20 );
- }
-
- template <typename T, typename TF = ftype<T>>
- KFR_SINTRIN TF amp_to_dB(T amp, T offset)
- {
- return log_fmadd(amp, subtype<TF>(8.6858896380650365530225783783322), offset);
- // return T( 20.0 ) * log10( level );
- }
-
- template <typename T, typename TF = ftype<T>>
- KFR_SINTRIN TF dB_to_amp(T dB, T offset)
- {
- auto offs = -subtype<TF>(0.11512925464970228420089957273422) * offset;
- return exp_fmadd(dB, subtype<TF>(0.11512925464970228420089957273422), offs);
- // return exp10( dB / 20 );
- }
-
- template <typename T>
- KFR_SINTRIN T power_to_dB(T x)
- {
- return log(x) * (10 * c_recip_log_10<T>);
- }
-
- template <typename T>
- KFR_SINTRIN T dB_to_power(T x)
- {
- if (x == -c_infinity<T>)
- return 0.0;
- else
- return exp(x * (c_log_10<T> / 10.0));
- }
-
- template <typename T, typename TF = ftype<T>>
- KFR_SINTRIN TF note_to_hertz(T note)
- {
- const subtype<TF> offset = 2.1011784386926213177653145771814;
-
- return exp_fmadd(note, subtype<TF>(0.05776226504666210911810267678818), offset);
- }
-
- template <typename T, typename TF = ftype<T>>
- KFR_SINTRIN TF hertz_to_note(T hertz)
- {
- const subtype<TF> offset = -36.376316562295915248836189714583;
-
- return log_fmadd(hertz, subtype<TF>(17.312340490667560888319096172023), offset);
- }
-
- template <typename T1, typename T2, typename T3, typename Tc = common_type<T1, T2, T3, f32>>
- KFR_SINTRIN Tc note_to_hertz(T1 note, T2 tunenote, T3 tunehertz)
- {
- const Tc offset = log(tunehertz) - tunenote * subtype<Tc>(0.05776226504666210911810267678818);
-
- return exp_fmadd(note, subtype<Tc>(0.05776226504666210911810267678818), offset);
- }
-
- template <typename T1, typename T2, typename T3, typename Tc = common_type<T1, T2, T3, f32>>
- KFR_SINTRIN Tc hertz_to_note(T1 hertz, T2 tunenote, T3 tunehertz)
- {
- const Tc offset = tunenote - log(tunehertz) * subtype<Tc>(17.312340490667560888319096172023);
-
- return log_fmadd(hertz, subtype<Tc>(17.312340490667560888319096172023), offset);
- }
-
- KFR_SPEC_FN(in_dsp_units, note_to_hertz)
- KFR_SPEC_FN(in_dsp_units, hertz_to_note)
- KFR_SPEC_FN(in_dsp_units, amp_to_dB)
- KFR_SPEC_FN(in_dsp_units, dB_to_amp)
- KFR_SPEC_FN(in_dsp_units, power_to_dB)
- KFR_SPEC_FN(in_dsp_units, dB_to_power)
-};
-}
-
-using fn_note_to_hertz = internal::in_dsp_units<>::fn_note_to_hertz;
+template <typename T, typename TF = ftype<T>>
+KFR_SINTRIN TF amp_to_dB(T amp)
+{
+ return log(cast<subtype<TF>>(amp)) * subtype<TF>(8.6858896380650365530225783783322);
+ // return T( 20.0 ) * log10( level );
+}
+
+template <typename T, typename TF = ftype<T>>
+KFR_SINTRIN TF dB_to_amp(T dB)
+{
+ return exp(dB * subtype<TF>(0.11512925464970228420089957273422));
+ // return exp10( dB / 20 );
+}
+
+template <typename T, typename TF = ftype<T>>
+KFR_SINTRIN TF amp_to_dB2(T amp, T offset)
+{
+ return log_fmadd(amp, subtype<TF>(8.6858896380650365530225783783322), offset);
+ // return T( 20.0 ) * log10( level );
+}
+
+template <typename T, typename TF = ftype<T>>
+KFR_SINTRIN TF dB_to_amp(T dB, T offset)
+{
+ auto offs = -subtype<TF>(0.11512925464970228420089957273422) * offset;
+ return exp_fmadd(dB, subtype<TF>(0.11512925464970228420089957273422), offs);
+ // return exp10( dB / 20 );
+}
+
+template <typename T>
+KFR_SINTRIN T power_to_dB(T x)
+{
+ return log(x) * (10 * c_recip_log_10<T>);
+}
+
+template <typename T>
+KFR_SINTRIN T dB_to_power(T x)
+{
+ if (x == -c_infinity<T>)
+ return 0.0;
+ else
+ return exp(x * (c_log_10<T> / 10.0));
+}
+
+template <typename T, typename TF = ftype<T>>
+KFR_SINTRIN TF note_to_hertz(T note)
+{
+ const subtype<TF> offset = 2.1011784386926213177653145771814;
+
+ return exp_fmadd(note, subtype<TF>(0.05776226504666210911810267678818), offset);
+}
+
+template <typename T, typename TF = ftype<T>>
+KFR_SINTRIN TF hertz_to_note(T hertz)
+{
+ const subtype<TF> offset = -36.376316562295915248836189714583;
+
+ return log_fmadd(hertz, subtype<TF>(17.312340490667560888319096172023), offset);
+}
+
+template <typename T1, typename T2, typename T3, typename Tc = common_type<T1, T2, T3, f32>>
+KFR_SINTRIN Tc note_to_hertz(T1 note, T2 tunenote, T3 tunehertz)
+{
+ const Tc offset = log(tunehertz) - tunenote * subtype<Tc>(0.05776226504666210911810267678818);
+
+ return exp_fmadd(note, subtype<Tc>(0.05776226504666210911810267678818), offset);
+}
+
+template <typename T1, typename T2, typename T3, typename Tc = common_type<T1, T2, T3, f32>>
+KFR_SINTRIN Tc hertz_to_note(T1 hertz, T2 tunenote, T3 tunehertz)
+{
+ const Tc offset = tunenote - log(tunehertz) * subtype<Tc>(17.312340490667560888319096172023);
+
+ return log_fmadd(hertz, subtype<Tc>(17.312340490667560888319096172023), offset);
+}
+
+KFR_I_FN(note_to_hertz)
+KFR_I_FN(hertz_to_note)
+KFR_I_FN(amp_to_dB)
+KFR_I_FN(dB_to_amp)
+KFR_I_FN(power_to_dB)
+KFR_I_FN(dB_to_power)
+}
+
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> note_to_hertz(const T1& x)
+KFR_INTRIN T1 note_to_hertz(const T1& x)
{
- return internal::in_dsp_units<>::note_to_hertz(x);
+ return internal::note_to_hertz(x);
}
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_note_to_hertz, E1> note_to_hertz(E1&& x)
+KFR_INTRIN expr_func<internal::fn_note_to_hertz, E1> note_to_hertz(E1&& x)
{
return { {}, std::forward<E1>(x) };
}
-using fn_hertz_to_note = internal::in_dsp_units<>::fn_hertz_to_note;
+
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> hertz_to_note(const T1& x)
+KFR_INTRIN T1 hertz_to_note(const T1& x)
{
- return internal::in_dsp_units<>::hertz_to_note(x);
+ return internal::hertz_to_note(x);
}
+
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_hertz_to_note, E1> hertz_to_note(E1&& x)
+KFR_INTRIN expr_func<internal::fn_hertz_to_note, E1> hertz_to_note(E1&& x)
{
return { {}, std::forward<E1>(x) };
}
-using fn_amp_to_dB = internal::in_dsp_units<>::fn_amp_to_dB;
+
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> amp_to_dB(const T1& x)
+KFR_INTRIN T1 amp_to_dB(const T1& x)
{
- return internal::in_dsp_units<>::amp_to_dB(x);
+ return internal::amp_to_dB(x);
}
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_amp_to_dB, E1> amp_to_dB(E1&& x)
+KFR_INTRIN expr_func<internal::fn_amp_to_dB, E1> amp_to_dB(E1&& x)
{
return { {}, std::forward<E1>(x) };
}
-using fn_dB_to_amp = internal::in_dsp_units<>::fn_dB_to_amp;
+
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> dB_to_amp(const T1& x)
+KFR_INTRIN T1 dB_to_amp(const T1& x)
{
- return internal::in_dsp_units<>::dB_to_amp(x);
+ return internal::dB_to_amp(x);
}
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_dB_to_amp, E1> dB_to_amp(E1&& x)
+KFR_INTRIN expr_func<internal::fn_dB_to_amp, E1> dB_to_amp(E1&& x)
{
return { {}, std::forward<E1>(x) };
}
-using fn_power_to_dB = internal::in_dsp_units<>::fn_power_to_dB;
+
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> power_to_dB(const T1& x)
+KFR_INTRIN T1 power_to_dB(const T1& x)
{
- return internal::in_dsp_units<>::power_to_dB(x);
+ return internal::power_to_dB(x);
}
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_power_to_dB, E1> power_to_dB(E1&& x)
+KFR_INTRIN expr_func<internal::fn_power_to_dB, E1> power_to_dB(E1&& x)
{
return { {}, std::forward<E1>(x) };
}
-using fn_dB_to_power = internal::in_dsp_units<>::fn_dB_to_power;
+
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> dB_to_power(const T1& x)
+KFR_INTRIN T1 dB_to_power(const T1& x)
{
- return internal::in_dsp_units<>::dB_to_power(x);
+ return internal::dB_to_power(x);
}
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_INTRIN expr_func<fn_dB_to_power, E1> dB_to_power(E1&& x)
+KFR_INTRIN expr_func<internal::fn_dB_to_power, E1> dB_to_power(E1&& x)
{
return { {}, std::forward<E1>(x) };
}
}
-
-#pragma clang diagnostic pop
diff --git a/include/kfr/dsp/weighting.hpp b/include/kfr/dsp/weighting.hpp
@@ -31,92 +31,83 @@ namespace kfr
namespace internal
{
-template <cpu_t c = cpu_t::native, cpu_t cc = c>
-struct in_weight : in_sqrt<cc>, in_dsp_units<cc>
+template <typename T>
+KFR_SINTRIN T weight_a_unnorm(T f)
{
-private:
- using in_dsp_units<cc>::amp_to_dB;
-
-public:
- template <typename T>
- KFR_SINTRIN T weight_a_unnorm(T f)
- {
- const T f2 = pow2(f);
- const T nom = pow2(12200) * pow4(f);
- const T den =
- (f2 + pow2(20.6)) * (sqrt((f2 + pow2(107.7)) * (f2 + pow2(737.9)))) * (f2 + pow2(12200));
- return nom / den;
- }
+ const T f2 = pow2(f);
+ const T nom = pow2(12200) * pow4(f);
+ const T den = (f2 + pow2(20.6)) * (sqrt((f2 + pow2(107.7)) * (f2 + pow2(737.9)))) * (f2 + pow2(12200));
+ return nom / den;
+}
- template <typename T>
- constexpr static T weight_a_gain = reciprocal(weight_a_unnorm(T(1000.0)));
+template <typename T>
+constexpr static T weight_a_gain = reciprocal(weight_a_unnorm(T(1000.0)));
- template <typename T>
- KFR_SINTRIN T aweighting(T f)
- {
- return weight_a_unnorm(f) * weight_a_gain<subtype<T>>;
- }
+template <typename T>
+KFR_SINTRIN T aweighting(T f)
+{
+ return weight_a_unnorm(f) * weight_a_gain<subtype<T>>;
+}
- template <typename T>
- KFR_SINTRIN T weight_b_unnorm(T f)
- {
- const T f2 = pow2(f);
- const T nom = pow2(12200) * pow3(f);
- const T den = (f2 + pow2(20.6)) * (sqrt((f2 + pow2(158.5)))) * (f2 + pow2(12200));
+template <typename T>
+KFR_SINTRIN T weight_b_unnorm(T f)
+{
+ const T f2 = pow2(f);
+ const T nom = pow2(12200) * pow3(f);
+ const T den = (f2 + pow2(20.6)) * (sqrt((f2 + pow2(158.5)))) * (f2 + pow2(12200));
- return nom / den;
- }
+ return nom / den;
+}
- template <typename T>
- constexpr static T weight_b_gain = reciprocal(weight_b_unnorm(T(1000.0)));
+template <typename T>
+constexpr static T weight_b_gain = reciprocal(weight_b_unnorm(T(1000.0)));
- template <typename T>
- KFR_SINTRIN T bweighting(T f)
- {
- return weight_b_unnorm(f) * weight_b_gain<subtype<T>>;
- }
+template <typename T>
+KFR_SINTRIN T bweighting(T f)
+{
+ return weight_b_unnorm(f) * weight_b_gain<subtype<T>>;
+}
- template <typename T>
- KFR_SINTRIN T weight_c_unnorm(T f)
- {
- const T f2 = pow2(f);
- const T nom = pow2(12200) * f2;
- const T den = (f2 + pow2(20.6)) * (f2 + pow2(12200));
+template <typename T>
+KFR_SINTRIN T weight_c_unnorm(T f)
+{
+ const T f2 = pow2(f);
+ const T nom = pow2(12200) * f2;
+ const T den = (f2 + pow2(20.6)) * (f2 + pow2(12200));
- return nom / den;
- }
+ return nom / den;
+}
- template <typename T>
- constexpr static T weight_c_gain = reciprocal(weight_c_unnorm(T(1000.0)));
+template <typename T>
+constexpr static T weight_c_gain = reciprocal(weight_c_unnorm(T(1000.0)));
- template <typename T>
- KFR_SINTRIN T cweighting(T f)
- {
- return weight_c_unnorm(f) * weight_c_gain<subtype<T>>;
- }
+template <typename T>
+KFR_SINTRIN T cweighting(T f)
+{
+ return weight_c_unnorm(f) * weight_c_gain<subtype<T>>;
+}
- template <typename T>
- KFR_SINTRIN T aweightingdB(T f)
- {
- return amp_to_dB(aweighting(f));
- }
- template <typename T>
- KFR_SINTRIN T bweightingdB(T f)
- {
- return amp_to_dB(bweighting(f));
- }
- template <typename T>
- KFR_SINTRIN T cweightingdB(T f)
- {
- return amp_to_dB(cweighting(f));
- }
+template <typename T>
+KFR_SINTRIN T aweightingdB(T f)
+{
+ return amp_to_dB(aweighting(f));
+}
+template <typename T>
+KFR_SINTRIN T bweightingdB(T f)
+{
+ return amp_to_dB(bweighting(f));
+}
+template <typename T>
+KFR_SINTRIN T cweightingdB(T f)
+{
+ return amp_to_dB(cweighting(f));
+}
- KFR_SPEC_FN(in_weight, aweighting)
- KFR_SPEC_FN(in_weight, bweighting)
- KFR_SPEC_FN(in_weight, cweighting)
- KFR_SPEC_FN(in_weight, aweightingdB)
- KFR_SPEC_FN(in_weight, bweightingdB)
- KFR_SPEC_FN(in_weight, cweightingdB)
-};
+KFR_FN(aweighting)
+KFR_FN(bweighting)
+KFR_FN(cweighting)
+KFR_FN(aweightingdB)
+KFR_FN(bweightingdB)
+KFR_FN(cweightingdB)
}
}
diff --git a/include/kfr/dsp/window.hpp b/include/kfr/dsp/window.hpp
@@ -23,16 +23,12 @@
#pragma once
#include "../base/log_exp.hpp"
+#include "../base/modzerobessel.hpp"
#include "../base/sin_cos.hpp"
#include "../base/sqrt.hpp"
#include "../base/vec.hpp"
#include "../expressions/pointer.hpp"
-#pragma clang diagnostic push
-#if CID_HAS_WARNING("-Winaccessible-base")
-#pragma clang diagnostic ignored "-Winaccessible-base"
-#endif
-
namespace kfr
{
@@ -70,482 +66,406 @@ namespace internal
{
template <typename T>
-constexpr T bessel_coef[] = { T(0.25),
- T(0.027777777777777776236),
- T(0.0017361111111111110147),
- T(6.9444444444444444384e-005),
- T(1.9290123456790123911e-006),
- T(3.9367598891408417495e-008),
- T(6.1511873267825652335e-010),
- T(7.5940584281266239246e-012),
- T(7.5940584281266233693e-014),
- T(6.2760813455591932909e-016),
- T(4.3583898233049949985e-018),
- T(2.5789288895295827557e-020),
- T(1.3157800456783586208e-022),
- T(5.8479113141260384983e-025),
- T(2.2843403570804837884e-027),
- T(7.904291893012054025e-030),
- T(2.4395962632753252792e-032),
- T(6.75788438580422547e-035),
- T(1.689471096451056426e-037),
- T(3.8310002187098784929e-040),
- T(7.9152897080782616517e-043),
- T(1.4962740468957016443e-045),
- T(2.5976979980828152196e-048),
- T(4.1563167969325041577e-051),
- T(6.1483976285983795968e-054),
- T(8.434015951438105991e-057),
- T(1.0757673407446563809e-059),
- T(1.2791526049282476926e-062),
- T(1.4212806721424974034e-065),
- T(1.4789601166935457918e-068),
- T(1.4442969889585408123e-071),
- T(1.3262598613026086927e-074),
- T(1.1472836170437790782e-077),
- T(9.3655805472961564331e-081),
- T(7.2265282000741942594e-084),
- T(5.2786911614858977913e-087),
- T(3.6556032974279072401e-090),
- T(2.4034209713529963119e-093),
- T(1.5021381070956226783e-096) };
-
-template <typename T, size_t N>
-KFR_INLINE vec<T, N> modzerobessel(vec<T, N> x)
-{
- const vec<T, N> x_2 = x * 0.5;
- const vec<T, N> x_2_sqr = x_2 * x_2;
- vec<T, N> num = x_2_sqr;
- vec<T, N> result;
- result = 1 + x_2_sqr;
-
- KFR_LOOP_UNROLL
- for (size_t i = 0; i < (sizeof(T) == 4 ? 20 : 39); i++)
- {
- result = fmadd((num *= x_2_sqr), bessel_coef<T>[i], result);
- }
- return result;
-}
+struct window_linspace_0_1 : expression_linspace<T>
+{
+ window_linspace_0_1(size_t size, window_symmetry symmetry)
+ : expression_linspace<T>(0, 1, size, symmetry == window_symmetry::symmetric)
+ {
+ }
+};
+
+template <typename T>
+struct window_linspace_m1_1 : expression_linspace<T>
+{
+ window_linspace_m1_1(size_t size, window_symmetry symmetry)
+ : expression_linspace<T>(-1, 1, size, symmetry == window_symmetry::symmetric)
+ {
+ }
+};
+
+template <typename T>
+struct window_linspace_mpi_pi : expression_linspace<T>
+{
+ window_linspace_mpi_pi(size_t size, window_symmetry symmetry)
+ : expression_linspace<T>(-c_pi<T>, +c_pi<T>, size, symmetry == window_symmetry::symmetric)
+ {
+ }
+};
+
+template <typename T>
+struct window_linspace_m1_1_trunc : expression_linspace<T>
+{
+ window_linspace_m1_1_trunc(size_t size, window_symmetry symmetry)
+ : expression_linspace<T>(-T(size - 1) / size, T(size - 1) / size, size,
+ symmetry == window_symmetry::symmetric)
+ {
+ }
+};
+
+template <typename T>
+struct window_linspace_m1_1_trunc2 : expression_linspace<T>
+{
+ window_linspace_m1_1_trunc2(size_t size, window_symmetry symmetry)
+ : expression_linspace<T>(symmetric_linspace,
+ (size & 1) ? T(size - 1) / T(size + 1) : T(size - 1) / (size), size,
+ symmetry == window_symmetry::symmetric)
+ {
+ }
+};
-template <cpu_t cpu = cpu_t::native>
-struct in_window : in_sin_cos<cpu>, in_log_exp<cpu>, in_select<cpu>, in_sqrt<cpu>, in_abs<cpu>
+template <typename T>
+struct expression_rectangular : input_expression
{
+ using value_type = T;
+
+ template <cpu_t newcpu>
+ using retarget_this = expression_rectangular<T>;
+ expression_rectangular(size_t size, T = T(), window_symmetry = window_symmetry::symmetric) : m_size(size)
+ {
+ }
+ template <typename U, size_t N>
+ KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
+ {
+ using UI = utype<U>;
+ const vec<UI, N> i = enumerate(vec<UI, N>()) + cast<UI>(index);
+ return select(i < cast<UI>(m_size), U(1), U(0));
+ }
+ size_t size() const { return m_size; }
+
private:
- using in_sin_cos<cpu>::sin;
- using in_sin_cos<cpu>::cos;
- using in_sin_cos<cpu>::sinc;
- using in_log_exp<cpu>::exp;
- using in_select<cpu>::select;
- using in_sqrt<cpu>::sqrt;
- using in_abs<cpu>::abs;
-
-public:
- template <typename T>
- struct window_linspace_0_1 : expression_linspace<T>
- {
- window_linspace_0_1(size_t size, window_symmetry symmetry)
- : expression_linspace<T>(0, 1, size, symmetry == window_symmetry::symmetric)
- {
- }
- };
+ size_t m_size;
+};
- template <typename T>
- struct window_linspace_m1_1 : expression_linspace<T>
+template <typename T>
+struct expression_triangular : input_expression
+{
+ using value_type = T;
+
+ template <cpu_t newcpu>
+ using retarget_this = expression_triangular<T>;
+ expression_triangular(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric)
+ : linspace(size, symmetry), m_size(size)
{
- window_linspace_m1_1(size_t size, window_symmetry symmetry)
- : expression_linspace<T>(-1, 1, size, symmetry == window_symmetry::symmetric)
- {
- }
- };
+ }
+ template <typename U, size_t N>
+ KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
+ {
+ constexpr vec_t<T, N> y{};
+ return cast<U>(1 - abs(linspace(cinput, index, y)));
+ }
+ size_t size() const { return m_size; }
- template <typename T>
- struct window_linspace_mpi_pi : expression_linspace<T>
+private:
+ window_linspace_m1_1_trunc2<T> linspace;
+ size_t m_size;
+};
+
+template <typename T>
+struct expression_bartlett : input_expression
+{
+ using value_type = T;
+
+ template <cpu_t newcpu>
+ using retarget_this = expression_bartlett<T>;
+ expression_bartlett(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric)
+ : linspace(size, symmetry), m_size(size)
{
- window_linspace_mpi_pi(size_t size, window_symmetry symmetry)
- : expression_linspace<T>(-c_pi<T>, +c_pi<T>, size, symmetry == window_symmetry::symmetric)
- {
- }
- };
+ }
+ template <typename U, size_t N>
+ KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
+ {
+ constexpr vec_t<T, N> y{};
+ return cast<U>(1 - abs(linspace(cinput, index, y)));
+ }
+ size_t size() const { return m_size; }
- template <typename T>
- struct window_linspace_m1_1_trunc : expression_linspace<T>
+private:
+ window_linspace_m1_1<T> linspace;
+ size_t m_size;
+};
+
+template <typename T>
+struct expression_cosine : input_expression
+{
+ using value_type = T;
+
+ template <cpu_t newcpu>
+ using retarget_this = expression_cosine<T>;
+ expression_cosine(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric)
+ : linspace(size, symmetry), m_size(size)
{
- window_linspace_m1_1_trunc(size_t size, window_symmetry symmetry)
- : expression_linspace<T>(-T(size - 1) / size, T(size - 1) / size, size,
- symmetry == window_symmetry::symmetric)
- {
- }
- };
+ }
+ template <typename U, size_t N>
+ KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
+ {
+ constexpr vec_t<T, N> y{};
+ return cast<U>(sin(c_pi<T> * linspace(cinput, index, y)));
+ }
+ size_t size() const { return m_size; }
+
+private:
+ window_linspace_0_1<T> linspace;
+ size_t m_size;
+};
- template <typename T>
- struct window_linspace_m1_1_trunc2 : expression_linspace<T>
+template <typename T>
+struct expression_hann : input_expression
+{
+ using value_type = T;
+
+ template <cpu_t newcpu>
+ using retarget_this = expression_hann<T>;
+ expression_hann(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric)
+ : linspace(size, symmetry), m_size(size)
{
- window_linspace_m1_1_trunc2(size_t size, window_symmetry symmetry)
- : expression_linspace<T>(symmetric_linspace,
- (size & 1) ? T(size - 1) / T(size + 1) : T(size - 1) / (size), size,
- symmetry == window_symmetry::symmetric)
- {
- }
- };
+ }
+ template <typename U, size_t N>
+ KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
+ {
+ constexpr vec_t<T, N> y{};
+ return cast<U>(T(0.5) * (T(1) - cos(c_pi<T, 2> * linspace(cinput, index, y))));
+ }
+ size_t size() const { return m_size; }
- template <typename T>
- struct expression_rectangular : input_expression
- {
- using value_type = T;
-
- template <cpu_t newcpu>
- using retarget_this = typename in_window<newcpu>::template expression_rectangular<T>;
- expression_rectangular(size_t size, T = T(), window_symmetry = window_symmetry::symmetric)
- : m_size(size)
- {
- }
- template <typename U, size_t N>
- KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
- {
- using UI = utype<U>;
- const vec<UI, N> i = enumerate(vec<UI, N>()) + cast<UI>(index);
- return select(i < cast<UI>(m_size), U(1), U(0));
- }
- size_t size() const { return m_size; }
-
- private:
- size_t m_size;
- };
+private:
+ window_linspace_0_1<T> linspace;
+ size_t m_size;
+};
- template <typename T>
- struct expression_triangular : input_expression
- {
- using value_type = T;
-
- template <cpu_t newcpu>
- using retarget_this = typename in_window<newcpu>::template expression_triangular<T>;
- expression_triangular(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric)
- : linspace(size, symmetry), m_size(size)
- {
- }
- template <typename U, size_t N>
- KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
- {
- constexpr vec_t<T, N> y{};
- return cast<U>(1 - abs(linspace(cinput, index, y)));
- }
- size_t size() const { return m_size; }
-
- private:
- window_linspace_m1_1_trunc2<T> linspace;
- size_t m_size;
- };
+template <typename T>
+struct expression_bartlett_hann : input_expression
+{
+ using value_type = T;
- template <typename T>
- struct expression_bartlett : input_expression
- {
- using value_type = T;
-
- template <cpu_t newcpu>
- using retarget_this = typename in_window<newcpu>::template expression_bartlett<T>;
- expression_bartlett(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric)
- : linspace(size, symmetry), m_size(size)
- {
- }
- template <typename U, size_t N>
- KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
- {
- constexpr vec_t<T, N> y{};
- return cast<U>(1 - abs(linspace(cinput, index, y)));
- }
- size_t size() const { return m_size; }
-
- private:
- window_linspace_m1_1<T> linspace;
- size_t m_size;
- };
+ template <cpu_t newcpu>
+ using retarget_this = expression_bartlett_hann<T>;
- template <typename T>
- struct expression_cosine : input_expression
- {
- using value_type = T;
-
- template <cpu_t newcpu>
- using retarget_this = typename in_window<newcpu>::template expression_cosine<T>;
- expression_cosine(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric)
- : linspace(size, symmetry), m_size(size)
- {
- }
- template <typename U, size_t N>
- KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
- {
- constexpr vec_t<T, N> y{};
- return cast<U>(sin(c_pi<T> * linspace(cinput, index, y)));
- }
- size_t size() const { return m_size; }
-
- private:
- window_linspace_0_1<T> linspace;
- size_t m_size;
- };
+ expression_bartlett_hann(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric)
+ : linspace(size, symmetry), m_size(size)
+ {
+ }
+ template <typename U, size_t N>
+ KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
+ {
+ constexpr vec_t<T, N> y{};
+ const vec<T, N> xx = linspace(cinput, index, y);
+ return cast<U>(T(0.62) - T(0.48) * abs(xx - T(0.5)) + T(0.38) * cos(c_pi<T, 2> * (xx - T(0.5))));
+ }
+ size_t size() const { return m_size; }
- template <typename T>
- struct expression_hann : input_expression
- {
- using value_type = T;
-
- template <cpu_t newcpu>
- using retarget_this = typename in_window<newcpu>::template expression_hann<T>;
- expression_hann(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric)
- : linspace(size, symmetry), m_size(size)
- {
- }
- template <typename U, size_t N>
- KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
- {
- constexpr vec_t<T, N> y{};
- return cast<U>(T(0.5) * (T(1) - cos(c_pi<T, 2> * linspace(cinput, index, y))));
- }
- size_t size() const { return m_size; }
-
- private:
- window_linspace_0_1<T> linspace;
- size_t m_size;
- };
+private:
+ window_linspace_0_1<T> linspace;
+ size_t m_size;
+};
- template <typename T>
- struct expression_bartlett_hann : input_expression
- {
- using value_type = T;
-
- template <cpu_t newcpu>
- using retarget_this = typename in_window<newcpu>::template expression_bartlett_hann<T>;
-
- expression_bartlett_hann(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric)
- : linspace(size, symmetry), m_size(size)
- {
- }
- template <typename U, size_t N>
- KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
- {
- constexpr vec_t<T, N> y{};
- const vec<T, N> xx = linspace(cinput, index, y);
- return cast<U>(T(0.62) - T(0.48) * abs(xx - T(0.5)) + T(0.38) * cos(c_pi<T, 2> * (xx - T(0.5))));
- }
- size_t size() const { return m_size; }
-
- private:
- window_linspace_0_1<T> linspace;
- size_t m_size;
- };
+template <typename T>
+struct expression_hamming : input_expression
+{
+ using value_type = T;
- template <typename T>
- struct expression_hamming : input_expression
- {
- using value_type = T;
-
- template <cpu_t newcpu>
- using retarget_this = typename in_window<newcpu>::template expression_hamming<T>;
- expression_hamming(size_t size, T alpha = 0.54, window_symmetry symmetry = window_symmetry::symmetric)
- : linspace(size, symmetry), alpha(alpha), m_size(size)
- {
- }
- template <typename U, size_t N>
- KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
- {
- constexpr vec_t<T, N> y{};
- return cast<U>(alpha - (1.0 - alpha) * (cos(c_pi<T, 2> * linspace(cinput, index, y))));
- }
- size_t size() const { return m_size; }
-
- private:
- window_linspace_0_1<T> linspace;
- T alpha;
- size_t m_size;
- };
+ template <cpu_t newcpu>
+ using retarget_this = expression_hamming<T>;
+ expression_hamming(size_t size, T alpha = 0.54, window_symmetry symmetry = window_symmetry::symmetric)
+ : linspace(size, symmetry), alpha(alpha), m_size(size)
+ {
+ }
+ template <typename U, size_t N>
+ KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
+ {
+ constexpr vec_t<T, N> y{};
+ return cast<U>(alpha - (1.0 - alpha) * (cos(c_pi<T, 2> * linspace(cinput, index, y))));
+ }
+ size_t size() const { return m_size; }
- template <typename T>
- struct expression_bohman : input_expression
- {
- using value_type = T;
-
- template <cpu_t newcpu>
- using retarget_this = typename in_window<newcpu>::template expression_bohman<T>;
- expression_bohman(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric)
- : linspace(size, symmetry), m_size(size)
- {
- }
- template <typename U, size_t N>
- KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
- {
- constexpr vec_t<T, N> y{};
- const vec<U, N> n = abs(linspace(cinput, index, y));
- return cast<U>((T(1) - n) * cos(c_pi<T> * n) + (T(1) / c_pi<T>)*sin(c_pi<T> * n));
- }
- size_t size() const { return m_size; }
-
- private:
- window_linspace_m1_1<T> linspace;
- size_t m_size;
- };
+private:
+ window_linspace_0_1<T> linspace;
+ T alpha;
+ size_t m_size;
+};
- template <typename T>
- struct expression_blackman : input_expression
- {
- using value_type = T;
-
- template <cpu_t newcpu>
- using retarget_this = typename in_window<newcpu>::template expression_blackman<T>;
- expression_blackman(size_t size, T alpha = 0.16,
- window_symmetry symmetry = window_symmetry::symmetric)
- : linspace(size, symmetry), a0((1 - alpha) * 0.5), a1(0.5), a2(alpha * 0.5), m_size(size)
- {
- }
- template <typename U, size_t N>
- KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
- {
- constexpr vec_t<T, N> y{};
- const vec<T, N> n = linspace(cinput, index, y);
- return cast<U>(a0 - a1 * cos(c_pi<T, 2> * n) + a2 * cos(c_pi<T, 4> * n));
- }
- size_t size() const { return m_size; }
-
- private:
- window_linspace_0_1<T> linspace;
- T a0, a1, a2;
- size_t m_size;
- };
+template <typename T>
+struct expression_bohman : input_expression
+{
+ using value_type = T;
- template <typename T>
- struct expression_blackman_harris : input_expression
- {
- using value_type = T;
-
- template <cpu_t newcpu>
- using retarget_this = typename in_window<newcpu>::template expression_blackman_harris<T>;
- expression_blackman_harris(size_t size, T = T(),
- window_symmetry symmetry = window_symmetry::symmetric)
- : linspace(size, symmetry), m_size(size)
- {
- }
- template <typename U, size_t N>
- KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
- {
- constexpr vec_t<T, N> y{};
- const vec<T, N> n = linspace(cinput, index, y) * c_pi<T, 2>;
-
- return cast<U>(T(0.35875) - T(0.48829) * cos(n) + T(0.14128) * cos(2 * n) -
- T(0.01168) * cos(3 * n));
- }
- size_t size() const { return m_size; }
-
- private:
- window_linspace_0_1<T> linspace;
- size_t m_size;
- };
+ template <cpu_t newcpu>
+ using retarget_this = expression_bohman<T>;
+ expression_bohman(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric)
+ : linspace(size, symmetry), m_size(size)
+ {
+ }
+ template <typename U, size_t N>
+ KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
+ {
+ constexpr vec_t<T, N> y{};
+ const vec<U, N> n = abs(linspace(cinput, index, y));
+ return cast<U>((T(1) - n) * cos(c_pi<T> * n) + (T(1) / c_pi<T>)*sin(c_pi<T> * n));
+ }
+ size_t size() const { return m_size; }
- template <typename T>
- struct expression_kaiser : input_expression
- {
- using value_type = T;
-
- template <cpu_t newcpu>
- using retarget_this = typename in_window<newcpu>::template expression_kaiser<T>;
- expression_kaiser(size_t size, T beta = 0.5, window_symmetry symmetry = window_symmetry::symmetric)
- : linspace(size, symmetry), beta(beta), m(reciprocal(modzerobessel(make_vector(beta))[0])),
- m_size(size)
- {
- }
- template <typename U, size_t N>
- KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
- {
- constexpr vec_t<T, N> y{};
- return cast<U>(modzerobessel(beta * sqrt(1 - sqr(linspace(cinput, index, y)))) * m);
- }
- size_t size() const { return m_size; }
-
- private:
- window_linspace_m1_1<T> linspace;
- T beta;
- T m;
- size_t m_size;
- };
+private:
+ window_linspace_m1_1<T> linspace;
+ size_t m_size;
+};
- template <typename T>
- struct expression_flattop : input_expression
- {
- using value_type = T;
-
- template <cpu_t newcpu>
- using retarget_this = typename in_window<newcpu>::template expression_flattop<T>;
- expression_flattop(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric)
- : linspace(size, symmetry), m_size(size)
- {
- }
- template <typename U, size_t N>
- KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
- {
- constexpr vec_t<T, N> y{};
- const vec<T, N> n = linspace(cinput, index, y) * c_pi<T, 2>;
- constexpr T a0 = 1;
- constexpr T a1 = 1.93;
- constexpr T a2 = 1.29;
- constexpr T a3 = 0.388;
- constexpr T a4 = 0.028;
- return cast<U>(a0 - a1 * cos(n) + a2 * cos(2 * n) - a3 * cos(3 * n) + a4 * cos(4 * n));
- }
- size_t size() const { return m_size; }
-
- private:
- window_linspace_0_1<T> linspace;
- size_t m_size;
- };
+template <typename T>
+struct expression_blackman : input_expression
+{
+ using value_type = T;
- template <typename T>
- struct expression_gaussian : input_expression
- {
- using value_type = T;
-
- template <cpu_t newcpu>
- using retarget_this = typename in_window<newcpu>::template expression_gaussian<T>;
-
- expression_gaussian(size_t size, T alpha = 2.5, window_symmetry symmetry = window_symmetry::symmetric)
- : linspace(size, symmetry), alpha(alpha), m_size(size)
- {
- }
- template <typename U, size_t N>
- KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
- {
- constexpr vec_t<T, N> y{};
- return cast<U>(exp(-0.5 * sqr(alpha * linspace(cinput, index, y))));
- }
-
- size_t size() const { return m_size; }
- private:
- window_linspace_m1_1_trunc<T> linspace;
- T alpha;
- size_t m_size;
- };
+ template <cpu_t newcpu>
+ using retarget_this = expression_blackman<T>;
+ expression_blackman(size_t size, T alpha = 0.16, window_symmetry symmetry = window_symmetry::symmetric)
+ : linspace(size, symmetry), a0((1 - alpha) * 0.5), a1(0.5), a2(alpha * 0.5), m_size(size)
+ {
+ }
+ template <typename U, size_t N>
+ KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
+ {
+ constexpr vec_t<T, N> y{};
+ const vec<T, N> n = linspace(cinput, index, y);
+ return cast<U>(a0 - a1 * cos(c_pi<T, 2> * n) + a2 * cos(c_pi<T, 4> * n));
+ }
+ size_t size() const { return m_size; }
- template <typename T>
- struct expression_lanczos : input_expression
- {
- using value_type = T;
-
- template <cpu_t newcpu>
- using retarget_this = typename in_window<newcpu>::template expression_lanczos<T>;
- expression_lanczos(size_t size, T alpha = 2.5, window_symmetry symmetry = window_symmetry::symmetric)
- : linspace(size, symmetry), alpha(alpha), m_size(size)
- {
- }
- template <typename U, size_t N>
- KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
- {
- constexpr vec_t<T, N> y{};
- return cast<U>(sinc(linspace(cinput, index, y)));
- }
- size_t size() const { return m_size; }
-
- private:
- window_linspace_mpi_pi<T> linspace;
- T alpha;
- size_t m_size;
- };
+private:
+ window_linspace_0_1<T> linspace;
+ T a0, a1, a2;
+ size_t m_size;
+};
+
+template <typename T>
+struct expression_blackman_harris : input_expression
+{
+ using value_type = T;
+
+ template <cpu_t newcpu>
+ using retarget_this = expression_blackman_harris<T>;
+ expression_blackman_harris(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric)
+ : linspace(size, symmetry), m_size(size)
+ {
+ }
+ template <typename U, size_t N>
+ KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
+ {
+ constexpr vec_t<T, N> y{};
+ const vec<T, N> n = linspace(cinput, index, y) * c_pi<T, 2>;
+
+ return cast<U>(T(0.35875) - T(0.48829) * cos(n) + T(0.14128) * cos(2 * n) - T(0.01168) * cos(3 * n));
+ }
+ size_t size() const { return m_size; }
+
+private:
+ window_linspace_0_1<T> linspace;
+ size_t m_size;
+};
+
+template <typename T>
+struct expression_kaiser : input_expression
+{
+ using value_type = T;
+
+ template <cpu_t newcpu>
+ using retarget_this = expression_kaiser<T>;
+ expression_kaiser(size_t size, T beta = 0.5, window_symmetry symmetry = window_symmetry::symmetric)
+ : linspace(size, symmetry), beta(beta), m(reciprocal(modzerobessel(make_vector(beta))[0])),
+ m_size(size)
+ {
+ }
+ template <typename U, size_t N>
+ KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
+ {
+ constexpr vec_t<T, N> y{};
+ return cast<U>(modzerobessel(beta * sqrt(1 - sqr(linspace(cinput, index, y)))) * m);
+ }
+ size_t size() const { return m_size; }
+
+private:
+ window_linspace_m1_1<T> linspace;
+ T beta;
+ T m;
+ size_t m_size;
+};
+
+template <typename T>
+struct expression_flattop : input_expression
+{
+ using value_type = T;
+
+ template <cpu_t newcpu>
+ using retarget_this = expression_flattop<T>;
+ expression_flattop(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric)
+ : linspace(size, symmetry), m_size(size)
+ {
+ }
+ template <typename U, size_t N>
+ KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
+ {
+ constexpr vec_t<T, N> y{};
+ const vec<T, N> n = linspace(cinput, index, y) * c_pi<T, 2>;
+ constexpr T a0 = 1;
+ constexpr T a1 = 1.93;
+ constexpr T a2 = 1.29;
+ constexpr T a3 = 0.388;
+ constexpr T a4 = 0.028;
+ return cast<U>(a0 - a1 * cos(n) + a2 * cos(2 * n) - a3 * cos(3 * n) + a4 * cos(4 * n));
+ }
+ size_t size() const { return m_size; }
+
+private:
+ window_linspace_0_1<T> linspace;
+ size_t m_size;
+};
+
+template <typename T>
+struct expression_gaussian : input_expression
+{
+ using value_type = T;
+
+ template <cpu_t newcpu>
+ using retarget_this = expression_gaussian<T>;
+
+ expression_gaussian(size_t size, T alpha = 2.5, window_symmetry symmetry = window_symmetry::symmetric)
+ : linspace(size, symmetry), alpha(alpha), m_size(size)
+ {
+ }
+ template <typename U, size_t N>
+ KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
+ {
+ constexpr vec_t<T, N> y{};
+ return cast<U>(exp(-0.5 * sqr(alpha * linspace(cinput, index, y))));
+ }
+
+ size_t size() const { return m_size; }
+private:
+ window_linspace_m1_1_trunc<T> linspace;
+ T alpha;
+ size_t m_size;
+};
+
+template <typename T>
+struct expression_lanczos : input_expression
+{
+ using value_type = T;
+
+ template <cpu_t newcpu>
+ using retarget_this = expression_lanczos<T>;
+ expression_lanczos(size_t size, T alpha = 2.5, window_symmetry symmetry = window_symmetry::symmetric)
+ : linspace(size, symmetry), alpha(alpha), m_size(size)
+ {
+ }
+ template <typename U, size_t N>
+ KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
+ {
+ constexpr vec_t<T, N> y{};
+ return cast<U>(sinc(linspace(cinput, index, y)));
+ }
+ size_t size() const { return m_size; }
+
+private:
+ window_linspace_mpi_pi<T> linspace;
+ T alpha;
+ size_t m_size;
};
template <window_type>
@@ -556,7 +476,7 @@ struct window_by_type;
struct window_by_type<window_type::win> \
{ \
template <typename T> \
- using type = in_window<>::expression_##win<T>; \
+ using type = expression_##win<T>; \
};
KFR_WINDOW_BY_TYPE(rectangular)
KFR_WINDOW_BY_TYPE(triangular)
@@ -574,83 +494,80 @@ KFR_WINDOW_BY_TYPE(gaussian)
KFR_WINDOW_BY_TYPE(lanczos)
}
-KFR_INLINE internal::in_window<>::expression_rectangular<fbase> window_rectangular(size_t size)
+KFR_INLINE internal::expression_rectangular<fbase> window_rectangular(size_t size)
{
- return internal::in_window<>::expression_rectangular<fbase>(size, fbase());
+ return internal::expression_rectangular<fbase>(size, fbase());
}
template <typename T = fbase>
-KFR_INLINE internal::in_window<>::expression_triangular<T> window_triangular(size_t size,
- ctype_t<T> = ctype_t<T>())
+KFR_INLINE internal::expression_triangular<T> window_triangular(size_t size, ctype_t<T> = ctype_t<T>())
{
- return internal::in_window<>::expression_triangular<T>(size);
+ return internal::expression_triangular<T>(size);
}
template <typename T = fbase>
-KFR_INLINE internal::in_window<>::expression_bartlett<T> window_bartlett(size_t size,
- ctype_t<T> = ctype_t<T>())
+KFR_INLINE internal::expression_bartlett<T> window_bartlett(size_t size, ctype_t<T> = ctype_t<T>())
{
- return internal::in_window<>::expression_bartlett<T>(size);
+ return internal::expression_bartlett<T>(size);
}
template <typename T = fbase>
-KFR_INLINE internal::in_window<>::expression_cosine<T> window_cosine(size_t size, ctype_t<T> = ctype_t<T>())
+KFR_INLINE internal::expression_cosine<T> window_cosine(size_t size, ctype_t<T> = ctype_t<T>())
{
- return internal::in_window<>::expression_cosine<T>(size);
+ return internal::expression_cosine<T>(size);
}
template <typename T = fbase>
-KFR_INLINE internal::in_window<>::expression_hann<T> window_hann(size_t size, ctype_t<T> = ctype_t<T>())
+KFR_INLINE internal::expression_hann<T> window_hann(size_t size, ctype_t<T> = ctype_t<T>())
{
- return internal::in_window<>::expression_hann<T>(size);
+ return internal::expression_hann<T>(size);
}
template <typename T = fbase>
-KFR_INLINE internal::in_window<>::expression_bartlett_hann<T> window_bartlett_hann(size_t size,
- ctype_t<T> = ctype_t<T>())
+KFR_INLINE internal::expression_bartlett_hann<T> window_bartlett_hann(size_t size, ctype_t<T> = ctype_t<T>())
{
- return internal::in_window<>::expression_bartlett_hann<T>(size);
+ return internal::expression_bartlett_hann<T>(size);
}
template <typename T = fbase>
-KFR_INLINE internal::in_window<>::expression_hamming<T> window_hamming(size_t size, T alpha = 0.54,
- ctype_t<T> = ctype_t<T>())
+KFR_INLINE internal::expression_hamming<T> window_hamming(size_t size, T alpha = 0.54,
+ ctype_t<T> = ctype_t<T>())
{
- return internal::in_window<>::expression_hamming<T>(size, alpha);
+ return internal::expression_hamming<T>(size, alpha);
}
template <typename T = fbase>
-KFR_INLINE internal::in_window<>::expression_bohman<T> window_bohman(size_t size, ctype_t<T> = ctype_t<T>())
+KFR_INLINE internal::expression_bohman<T> window_bohman(size_t size, ctype_t<T> = ctype_t<T>())
{
- return internal::in_window<>::expression_bohman<T>(size);
+ return internal::expression_bohman<T>(size);
}
template <typename T = fbase>
-KFR_INLINE internal::in_window<>::expression_blackman<T> window_blackman(
+KFR_INLINE internal::expression_blackman<T> window_blackman(
size_t size, T alpha = 0.16, window_symmetry symmetry = window_symmetry::symmetric,
ctype_t<T> = ctype_t<T>())
{
- return internal::in_window<>::expression_blackman<T>(size, alpha, symmetry);
+ return internal::expression_blackman<T>(size, alpha, symmetry);
}
template <typename T = fbase>
-KFR_INLINE internal::in_window<>::expression_blackman_harris<T> window_blackman_harris(
+KFR_INLINE internal::expression_blackman_harris<T> window_blackman_harris(
size_t size, window_symmetry symmetry = window_symmetry::symmetric, ctype_t<T> = ctype_t<T>())
{
- return internal::in_window<>::expression_blackman_harris<T>(size, T(), symmetry);
+ return internal::expression_blackman_harris<T>(size, T(), symmetry);
}
template <typename T = fbase>
-KFR_INLINE internal::in_window<>::expression_kaiser<T> window_kaiser(size_t size, T beta = T(0.5),
- ctype_t<T> = ctype_t<T>())
+KFR_INLINE internal::expression_kaiser<T> window_kaiser(size_t size, T beta = T(0.5),
+ ctype_t<T> = ctype_t<T>())
{
- return internal::in_window<>::expression_kaiser<T>(size, beta);
+ return internal::expression_kaiser<T>(size, beta);
}
template <typename T = fbase>
-KFR_INLINE internal::in_window<>::expression_flattop<T> window_flattop(size_t size, ctype_t<T> = ctype_t<T>())
+KFR_INLINE internal::expression_flattop<T> window_flattop(size_t size, ctype_t<T> = ctype_t<T>())
{
- return internal::in_window<>::expression_flattop<T>(size);
+ return internal::expression_flattop<T>(size);
}
template <typename T = fbase>
-KFR_INLINE internal::in_window<>::expression_gaussian<T> window_gaussian(size_t size, T alpha = 2.5,
- ctype_t<T> = ctype_t<T>())
+KFR_INLINE internal::expression_gaussian<T> window_gaussian(size_t size, T alpha = 2.5,
+ ctype_t<T> = ctype_t<T>())
{
- return internal::in_window<>::expression_gaussian<T>(size, alpha);
+ return internal::expression_gaussian<T>(size, alpha);
}
template <typename T = fbase>
-KFR_INLINE internal::in_window<>::expression_lanczos<T> window_lanczos(size_t size, ctype_t<T> = ctype_t<T>())
+KFR_INLINE internal::expression_lanczos<T> window_lanczos(size_t size, ctype_t<T> = ctype_t<T>())
{
- return internal::in_window<>::expression_lanczos<T>(size);
+ return internal::expression_lanczos<T>(size);
}
template <typename T = fbase, window_type type,
@@ -681,5 +598,3 @@ KFR_NOINLINE expression_pointer<T> window(size_t size, window_type type, T win_p
fn_returns<expression_pointer<T>>());
}
}
-
-#pragma clang diagnostic pop
diff --git a/include/kfr/expressions/reduce.hpp b/include/kfr/expressions/reduce.hpp
@@ -34,192 +34,99 @@ namespace kfr
template <typename T>
KFR_INLINE T final_mean(T value, size_t size)
{
- return value / size;
+ return value / T(size);
}
KFR_FN(final_mean)
template <typename T>
KFR_INLINE T final_rootmean(T value, size_t size)
{
- return internal::builtin_sqrt(value / size);
+ return internal::builtin_sqrt(value / T(size));
}
KFR_FN(final_rootmean)
namespace internal
{
-template <typename FinalFn, typename T, KFR_ENABLE_IF(is_callable<FinalFn, size_t, T>::value)>
+template <typename FinalFn, typename T, KFR_ENABLE_IF(is_callable<FinalFn, T, size_t>::value)>
KFR_INLINE auto reduce_call_final(FinalFn&& finalfn, size_t size, T value)
{
return finalfn(value, size);
}
-template <typename FinalFn, typename T, KFR_ENABLE_IF(!is_callable<FinalFn, size_t, T>::value)>
+template <typename FinalFn, typename T, KFR_ENABLE_IF(!is_callable<FinalFn, T, size_t>::value)>
KFR_INLINE auto reduce_call_final(FinalFn&& finalfn, size_t, T value)
{
return finalfn(value);
}
-template <cpu_t cpu = cpu_t::native>
-struct in_reduce
+template <typename T, typename ReduceFn, typename TransformFn, typename FinalFn, cpu_t cpu = cpu_t::native>
+struct expression_reduce : output_expression
{
+ constexpr static size_t width = vector_width<T, cpu> * bitness_const(1, 2);
- template <typename T, typename ReduceFn, typename TransformFn, typename FinalFn>
- struct expression_reduce : output_expression
+ expression_reduce(ReduceFn&& reducefn, TransformFn&& transformfn, FinalFn&& finalfn)
+ : counter(0), reducefn(std::move(reducefn)), transformfn(std::move(transformfn)),
+ finalfn(std::move(finalfn)), value(resize<width>(make_vector(reducefn(initialvalue<T>{}))))
{
- using Tsubtype = subtype<T>;
- constexpr static size_t width = vector_width<Tsubtype, cpu> * bitness_const(1, 2);
-
- expression_reduce(ReduceFn&& reducefn, TransformFn&& transformfn, FinalFn&& finalfn)
- : counter(0), reducefn(std::move(reducefn)), transformfn(std::move(transformfn)),
- finalfn(std::move(finalfn)), value(resize<width>(make_vector(reducefn(initialvalue<T>{}))))
- {
- }
-
- template <typename U, size_t N>
- KFR_INLINE void operator()(coutput_t, size_t, vec<U, N> x) const
- {
- counter += N;
- process(x);
- }
-
- KFR_INLINE T get()
- {
- return internal::reduce_call_final(finalfn, counter, horizontal(value, reducefn));
- }
-
- protected:
- void reset() { counter = 0; }
- template <size_t N, KFR_ENABLE_IF(N == width)>
- KFR_INLINE void process(vec<Tsubtype, N> x) const
- {
- value = reducefn(transformfn(x), value);
- }
-
- template <size_t N, KFR_ENABLE_IF(N < width)>
- KFR_INLINE void process(vec<Tsubtype, N> x) const
- {
- value = combine(value, reducefn(transformfn(x), narrow<N>(value)));
- }
-
- template <size_t N, KFR_ENABLE_IF(N > width)>
- KFR_INLINE void process(vec<Tsubtype, N> x) const
- {
- process(low(x));
- process(high(x));
- }
-
- mutable size_t counter;
- retarget<ReduceFn, cpu> reducefn;
- retarget<TransformFn, cpu> transformfn;
- retarget<FinalFn, cpu> finalfn;
- mutable vec<Tsubtype, width> value;
- };
-
- template <typename ReduceFn, typename TransformFn = fn_pass_through, typename FinalFn = fn_pass_through,
- typename E1, typename T = value_type_of<E1>>
- KFR_SINTRIN T reduce(E1&& e1, ReduceFn&& reducefn, TransformFn&& transformfn = fn_pass_through(),
- FinalFn&& finalfn = fn_pass_through())
- {
- static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())");
- static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())");
- const size_t size = e1.size();
- using reducer_t = expression_reduce<T, decay<ReduceFn>, decay<TransformFn>, decay<FinalFn>>;
- reducer_t red(std::forward<ReduceFn>(reducefn), std::forward<TransformFn>(transformfn),
- std::forward<FinalFn>(finalfn));
- process<T, cpu>(red, std::forward<E1>(e1), size);
-
- return red.get();
- }
-
- template <typename E1, typename T = value_type_of<E1>>
- KFR_SINTRIN T sum(E1&& x)
- {
- static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())");
- static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())");
- return reduce(std::forward<E1>(x), fn_add());
}
- template <typename E1, typename T = value_type_of<E1>>
- KFR_SINTRIN T mean(E1&& x)
+ template <typename U, size_t N>
+ KFR_INLINE void operator()(coutput_t, size_t, vec<U, N> x) const
{
- static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())");
- static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())");
- return reduce(std::forward<E1>(x), fn_add(), fn_pass_through(), fn_final_mean());
+ counter += N;
+ process(x);
}
- template <typename E1, typename T = value_type_of<E1>>
- KFR_SINTRIN T min(E1&& x)
- {
- using fn_min = typename in_min_max<cpu>::fn_min;
- static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())");
- static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())");
- return reduce(std::forward<E1>(x), fn_min());
- }
+ KFR_INLINE T get() { return internal::reduce_call_final(finalfn, counter, horizontal(value, reducefn)); }
- template <typename E1, typename T = value_type_of<E1>>
- KFR_SINTRIN T max(E1&& x)
- {
- using fn_max = typename in_min_max<cpu>::fn_max;
- static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())");
- static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())");
- return reduce(std::forward<E1>(x), fn_max());
- }
+protected:
+ void reset() { counter = 0; }
+ KFR_INLINE void process(vec<T, width> x) const { value = reducefn(transformfn(x), value); }
- template <typename E1, typename E2,
- typename T = value_type_of<decltype(std::declval<E1>() * std::declval<E2>())>>
- KFR_SINTRIN T dotproduct(E1&& x, E2&& y)
+ template <size_t N, KFR_ENABLE_IF(N < width)>
+ KFR_INLINE void process(vec<T, N> x) const
{
- auto m = std::forward<E1>(x) * std::forward<E2>(y);
- using E12 = decltype(m);
- static_assert(!is_generic<E12>::value, "e1 * e2 must be a typed expression (use typed<T>())");
- static_assert(!is_infinite<E12>::value, "e1 * e2 must be a sized expression (use typed<T>())");
- return reduce(std::move(m), fn_add());
+ value = combine(value, reducefn(transformfn(x), narrow<N>(value)));
}
- template <typename E1, typename T = value_type_of<E1>>
- KFR_SINTRIN T rms(E1&& x)
+ template <size_t N, KFR_ENABLE_IF(N > width)>
+ KFR_INLINE void process(vec<T, N> x) const
{
- static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())");
- static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())");
- return reduce(std::forward<E1>(x), fn_add(), fn_sqr(), fn_final_rootmean());
+ process(low(x));
+ process(high(x));
}
- template <typename E1, typename T = value_type_of<E1>>
- KFR_SINTRIN T sumsqr(E1&& x)
- {
- static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())");
- static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())");
- return reduce(std::forward<E1>(x), fn_add(), fn_sqr());
- }
+ mutable size_t counter;
+ retarget<ReduceFn, cpu> reducefn;
+ retarget<TransformFn, cpu> transformfn;
+ retarget<FinalFn, cpu> finalfn;
+ mutable vec<T, width> value;
+};
- template <typename E1, typename T = value_type_of<E1>>
- KFR_SINTRIN T product(E1&& x)
- {
- static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())");
- static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())");
- return reduce(std::forward<E1>(x), fn_mul());
- }
+template <typename ReduceFn, typename TransformFn = fn_pass_through, typename FinalFn = fn_pass_through,
+ typename E1, typename T = value_type_of<E1>>
+KFR_SINTRIN T reduce(E1&& e1, ReduceFn&& reducefn, TransformFn&& transformfn = fn_pass_through(),
+ FinalFn&& finalfn = fn_pass_through())
+{
+ static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())");
+ static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())");
+ const size_t size = e1.size();
+ using reducer_t = expression_reduce<T, decay<ReduceFn>, decay<TransformFn>, decay<FinalFn>>;
+ reducer_t red(std::forward<ReduceFn>(reducefn), std::forward<TransformFn>(transformfn),
+ std::forward<FinalFn>(finalfn));
+ process<T>(red, std::forward<E1>(e1), size);
- KFR_SPEC_FN(in_reduce, reduce)
- KFR_SPEC_FN(in_reduce, sum)
- KFR_SPEC_FN(in_reduce, dotproduct)
- KFR_SPEC_FN(in_reduce, rms)
- KFR_SPEC_FN(in_reduce, sumsqr)
- KFR_SPEC_FN(in_reduce, mean)
- KFR_SPEC_FN(in_reduce, min)
- KFR_SPEC_FN(in_reduce, max)
- KFR_SPEC_FN(in_reduce, product)
-};
+ return red.get();
}
-namespace native
-{
+KFR_FN(reduce)
+}
template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>::value)>
KFR_SINTRIN T sum(E1&& x)
{
static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())");
static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())");
- return internal::in_reduce<>::sum(std::forward<E1>(x));
+ return internal::reduce(std::forward<E1>(x), fn_add());
}
template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>::value)>
@@ -227,32 +134,50 @@ KFR_SINTRIN T mean(E1&& x)
{
static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())");
static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())");
- return internal::in_reduce<>::mean(std::forward<E1>(x));
+ return internal::reduce(std::forward<E1>(x), fn_add(), fn_pass_through(), fn_final_mean());
}
template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_SINTRIN T max(E1&& x)
+KFR_SINTRIN T minof(E1&& x)
{
static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())");
static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())");
- return internal::in_reduce<>::max(std::forward<E1>(x));
+ return internal::reduce(std::forward<E1>(x), internal::fn_min());
}
template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_SINTRIN T min(E1&& x)
+KFR_SINTRIN T maxof(E1&& x)
{
static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())");
static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())");
- return internal::in_reduce<>::min(std::forward<E1>(x));
+ return internal::reduce(std::forward<E1>(x), internal::fn_max());
+}
+
+template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+KFR_SINTRIN T absminof(E1&& x)
+{
+ static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())");
+ static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())");
+ return internal::reduce(std::forward<E1>(x), internal::fn_absmin());
+}
+
+template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+KFR_SINTRIN T absmaxof(E1&& x)
+{
+ static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())");
+ static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())");
+ return internal::reduce(std::forward<E1>(x), internal::fn_absmax());
}
template <typename E1, typename E2, typename T = value_type_of<E1>,
KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)>
KFR_SINTRIN T dotproduct(E1&& x, E2&& y)
{
- static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())");
- static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())");
- return internal::in_reduce<>::dotproduct(std::forward<E1>(x), std::forward<E2>(y));
+ auto m = std::forward<E1>(x) * std::forward<E2>(y);
+ using E12 = decltype(m);
+ static_assert(!is_generic<E12>::value, "e1 must be a typed expression (use typed<T>())");
+ static_assert(!is_infinite<E12>::value, "e1 must be a sized expression (use typed<T>())");
+ return internal::reduce(std::move(m), fn_add());
}
template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>::value)>
@@ -260,7 +185,7 @@ KFR_SINTRIN T rms(E1&& x)
{
static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())");
static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())");
- return internal::in_reduce<>::rms(std::forward<E1>(x));
+ return internal::reduce(std::forward<E1>(x), fn_add(), fn_sqr(), fn_final_rootmean());
}
template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>::value)>
@@ -268,7 +193,7 @@ KFR_SINTRIN T sumsqr(E1&& x)
{
static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())");
static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())");
- return internal::in_reduce<>::sumsqr(std::forward<E1>(x));
+ return internal::reduce(std::forward<E1>(x), fn_add(), fn_sqr());
}
template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>::value)>
@@ -276,7 +201,6 @@ KFR_SINTRIN T product(E1&& x)
{
static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())");
static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())");
- return internal::in_reduce<>::product(std::forward<E1>(x));
-}
+ return internal::reduce(std::forward<E1>(x), fn_mul());
}
}
diff --git a/include/kfr/math.hpp b/include/kfr/math.hpp
@@ -45,7 +45,3 @@
#include "base/sqrt.hpp"
#include "base/tan.hpp"
-namespace kfr
-{
-using namespace native;
-}
diff --git a/sources.cmake b/sources.cmake
@@ -20,6 +20,7 @@ set(
${PROJECT_SOURCE_DIR}/include/kfr/base/abs.hpp
${PROJECT_SOURCE_DIR}/include/kfr/base/asin_acos.hpp
${PROJECT_SOURCE_DIR}/include/kfr/base/atan.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/base/clamp.hpp
${PROJECT_SOURCE_DIR}/include/kfr/base/complex.hpp
${PROJECT_SOURCE_DIR}/include/kfr/base/constants.hpp
${PROJECT_SOURCE_DIR}/include/kfr/base/digitreverse.hpp
@@ -31,6 +32,7 @@ set(
${PROJECT_SOURCE_DIR}/include/kfr/base/logical.hpp
${PROJECT_SOURCE_DIR}/include/kfr/base/memory.hpp
${PROJECT_SOURCE_DIR}/include/kfr/base/min_max.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/base/modzerobessel.hpp
${PROJECT_SOURCE_DIR}/include/kfr/base/operators.hpp
${PROJECT_SOURCE_DIR}/include/kfr/base/read_write.hpp
${PROJECT_SOURCE_DIR}/include/kfr/base/round.hpp
@@ -71,7 +73,6 @@ set(
${PROJECT_SOURCE_DIR}/include/kfr/expressions/basic.hpp
${PROJECT_SOURCE_DIR}/include/kfr/expressions/conversion.hpp
${PROJECT_SOURCE_DIR}/include/kfr/expressions/generators.hpp
- ${PROJECT_SOURCE_DIR}/include/kfr/expressions/operators.hpp
${PROJECT_SOURCE_DIR}/include/kfr/expressions/pointer.hpp
${PROJECT_SOURCE_DIR}/include/kfr/expressions/reduce.hpp
${PROJECT_SOURCE_DIR}/include/kfr/io/audiofile.hpp
diff --git a/tests/basic_vector_test.cpp b/tests/basic_vector_test.cpp
@@ -11,142 +11,97 @@
#include <kfr/vec.hpp>
#include <kfr/version.hpp>
+#include "testo/testo.hpp"
+
using namespace kfr;
-using namespace kfr::native;
-template <typename T>
-void print_type(const T& value)
+TEST(test)
{
- println(type_name(value), ":");
- println(value);
-}
-
-int main(int /*argc*/, char** /*argv*/)
-{
- println(library_version());
- // >>> KFR ...
-
// How to make a vector:
// * Use constructor
const vec<double, 4> first{ 1, 2.5, -infinity, 3.1415926 };
- print_type(first);
- // >>> kfr::vec<double, 4>:
- // >>> 1 2.5 -inf 3.14159
+ CHECK(first == vec<double, 4>{ 1, 2.5, -infinity, 3.1415926 });
// * Use make_vector function
const auto second = make_vector(-1, +1);
- print_type(second);
- // >>> kfr::vec<int, 2>:
- // >>> -1 1
+ CHECK(second == vec<int, 2>{ -1, 1 });
// * Convert from vector of other type:
const vec<int, 4> int_vector{ 10, 20, 30, 40 };
const vec<double, 4> double_vector = cast<double>(int_vector);
- print_type(double_vector);
- // >>> kfr::vec<double, 4>:
- // >>> 10 20 30 40
+ CHECK(double_vector == vec<double, 4>{ 10, 20, 30, 40 });
// * Concat two vectors:
const vec<int, 1> left_part{ 1 };
const vec<int, 1> right_part{ 2 };
const vec<int, 2> pair{ left_part, right_part };
- print_type(pair);
- // >>> kfr::vec<int, 2>:
- // >>> 1 2
+ CHECK(pair == vec<int, 2>{ 1, 2 });
// * Same, but using make_vector and concat:
const vec<int, 2> pair2 = concat(make_vector(10), make_vector(20));
- print_type(pair2);
- // >>> kfr::vec<int, 2>:
- // >>> 10 20
+ CHECK(pair2 == vec<int, 2>{ 10, 20 });
// * Repeat vector multiple times:
const vec<short, 8> repeated = repeat<4>(make_vector<short>(0, -1));
- print_type(repeated);
- // >>> kfr::vec<short, 8>:
- // >>> 0 -1 0 -1 0 -1 0 -1
+ CHECK(repeated == vec<short, 8>{ 0, -1, 0, -1, 0, -1, 0, -1 });
// * Use enumerate to generate sequence of numbers:
const vec<int, 8> eight = enumerate<int, 8>();
- print_type(eight);
- // >>> kfr::vec<int, 8>:
- // >>> 0 1 2 3 4 5 6 7
+ CHECK(eight == vec<int, 8>{ 0, 1, 2, 3, 4, 5, 6, 7 });
// * Vectors can be of any length...
const vec<int, 1> one{ 42 };
const vec<int, 2> two = concat(one, make_vector(42));
- print_type(two);
- // >>> kfr::vec<int, 2>:
- // >>> 42 42
+ CHECK(two == vec<int, 2>{ 42, 42 });
const vec<u8, 256> very_long_vector = repeat<64>(make_vector<u8>(1, 2, 4, 8));
- print_type(slice<0, 17>(very_long_vector));
- // >>> kfr::vec<unsigned char, 17>:
- // >>> 1 2 4 8 1 2 4 8
- // >>> 1 2 4 8 1 2 4 8
- // >>> 1
+ CHECK(slice<0, 17>(very_long_vector) ==
+ vec<unsigned char, 17>{ 1, 2, 4, 8, 1, 2, 4, 8, 1, 2, 4, 8, 1, 2, 4, 8, 1 });
// * ...really any:
using big_vector = vec<i16, 107>;
big_vector v107 = enumerate<i16, 107>();
- print_type(hadd(v107));
- // >>> short:
- // >>> 5671
+ CHECK(hadd(v107) == static_cast<short>(5671));
using color = vec<u8, 3>;
const color green = cast<u8>(make_vector(0.0, 1.0, 0.0) * 255);
- print_type(green);
- // >>> kfr::vec<unsigned char, 3>:
- // >>> 0 255 0
+ CHECK(green == vec<unsigned char, 3>{ 0, 255, 0 });
// Vectors support all standard operators:
const auto op1 = make_vector(0, 1, 10, 100);
const auto op2 = make_vector(20, 2, -2, 200);
const auto result = op1 * op2 - 4;
- print_type(result);
- // >>> kfr::vec<int, 4>:
- // >>> -4 -2 -24 19996
+ CHECK(result == vec<int, 4>{ -4, -2, -24, 19996 });
// * Transform vector:
const vec<int, 8> numbers1 = enumerate<int, 8>();
const vec<int, 8> numbers2 = enumerate<int, 8>() + 100;
- print_type(odd(numbers1));
- print_type(even(numbers2));
- // >>> kfr::vec<int, 4>:
- // >>> 1 3 5 7
- // >>> kfr::vec<int, 4>:
- // >>> 100 102 104 106
+ CHECK(odd(numbers1) == vec<int, 4>{ 1, 3, 5, 7 });
+ CHECK(even(numbers2) == vec<int, 4>{ 100, 102, 104, 106 });
// * The following command pairs are equivalent:
- print_type(permute<0, 2, 1, 3, 4, 6, 5, 7>(numbers1));
- print_type(permute<0, 2, 1, 3>(numbers1));
- // >>> kfr::vec<int, 8>:
- // >>> 0 2 1 3 4 6 5 7
- // >>> kfr::vec<int, 8>:
- // >>> 0 2 1 3 4 6 5 7
-
- print_type(shuffle<0, 8, 2, 10, 4, 12, 6, 14>(numbers1, numbers2));
- print_type(shuffle<0, 8>(numbers1, numbers2));
- // >>> kfr::vec<int, 8>:
- // >>> 0 100 2 102 4 104 6 106
- // >>> kfr::vec<int, 8>:
- // >>> 0 100 2 102 4 104 6 106
-
- print_type(blend<0, 1, 1, 0, 1, 1, 0, 1>(numbers1, numbers2));
- print_type(blend<0, 1, 1>(numbers1, numbers2));
- // >>> kfr::vec<int, 8>:
- // >>> 0 101 102 3 104 105 6 107
- // >>> kfr::vec<int, 8>:
- // >>> 0 101 102 3 104 105 6 107
+ CHECK(permute(numbers1, elements<0, 2, 1, 3, 4, 6, 5, 7>) == vec<int, 8>{ 0, 2, 1, 3, 4, 6, 5, 7 });
+ CHECK(permute(numbers1, elements<0, 2, 1, 3>) == vec<int, 8>{ 0, 2, 1, 3, 4, 6, 5, 7 });
+
+ CHECK(shuffle(numbers1, numbers2, elements<0, 8, 2, 10, 4, 12, 6, 14>) ==
+ vec<int, 8>{ 0, 100, 2, 102, 4, 104, 6, 106 });
+ CHECK(shuffle(numbers1, numbers2, elements<0, 8>) == vec<int, 8>{ 0, 100, 2, 102, 4, 104, 6, 106 });
+
+ CHECK(blend(numbers1, numbers2, elements<0, 1, 1, 0, 1, 1, 0, 1>) ==
+ vec<int, 8>{ 0, 101, 102, 3, 104, 105, 6, 107 });
+ CHECK(blend(numbers1, numbers2, elements<0, 1, 1>) ==
+ vec<int, 8>{ 0, 101, 102, 3, 104, 105, 6, 107 });
// * Transpose matrix:
const auto sixteen = enumerate<float, 16>();
- print_type(transpose<4>(sixteen));
- // >>> kfr::vec<float, 16>:
- // >>> 0 4 8 12 1 5 9 13
- // >>> 2 6 10 14 3 7 11 15
- // >>>
+ CHECK(transpose<4>(sixteen) ==
+ vec<float, 16>{ 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 });
+}
+
+int main(int /*argc*/, char** /*argv*/)
+{
+ println(library_version());
- return 0;
+ return testo::run_all("", true);
}
diff --git a/tests/complex_test.cpp b/tests/complex_test.cpp
@@ -10,7 +10,6 @@
#include <kfr/base/complex.hpp>
#include <kfr/cometa/string.hpp>
#include <kfr/expressions/basic.hpp>
-#include <kfr/expressions/operators.hpp>
#include <kfr/expressions/reduce.hpp>
#include <kfr/math.hpp>
#include <kfr/version.hpp>
@@ -23,6 +22,7 @@ void assert_is_same()
static_assert(std::is_same<T1, T2>::value, "");
}
+
TEST(complex_vector)
{
const vec<c32, 1> c32x1{ c32{ 0, 1 } };
@@ -179,6 +179,9 @@ int main(int argc, char** argv)
static_assert(vector_width<i32, cpu_t::sse2> == 4, "");
static_assert(vector_width<complex<i32>, cpu_t::sse2> == 2, "");
+ static_assert(is_numeric<vec<complex<float>, 4>>::value, "");
+ static_assert(is_numeric_args<vec<complex<float>, 4>>::value, "");
+
static_assert(sizeof(vec<c32, 4>) == sizeof(vec<f32, 8>), "");
static_assert(vec<f32, 4>::size() == 4, "");
static_assert(vec<c32, 4>::size() == 4, "");
diff --git a/tests/conv_test.cpp b/tests/conv_test.cpp
@@ -23,7 +23,7 @@ TEST(test_convolve)
univector<double, 5> b({ 0.25, 0.5, 1.0, 0.5, 0.25 });
univector<double> c = convolve(a, b);
CHECK(c.size() == 9);
- CHECK(native::rms(c - univector<double>({ 0.25, 1., 2.75, 5., 7.5, 8.5, 7.75, 3.5, 1.25 })) < 0.0001);
+ CHECK(rms(c - univector<double>({ 0.25, 1., 2.75, 5., 7.5, 8.5, 7.75, 3.5, 1.25 })) < 0.0001);
}
int main(int argc, char** argv)
diff --git a/tests/dft_test.cpp b/tests/dft_test.cpp
@@ -14,7 +14,6 @@
#include <kfr/dft/fft.hpp>
#include <kfr/dft/reference_dft.hpp>
#include <kfr/expressions/basic.hpp>
-#include <kfr/expressions/operators.hpp>
#include <kfr/expressions/reduce.hpp>
#include <kfr/io/tostring.hpp>
#include <kfr/math.hpp>
diff --git a/tests/empty_test.cpp b/tests/empty_test.cpp
@@ -1,7 +1,5 @@
#include <kfr/math.hpp>
-#include <kfr/vec.hpp>
using namespace kfr;
-using namespace kfr::native;
int main(int argc, char** argv) { return 0; }
diff --git a/tests/fracdelay_test.cpp b/tests/fracdelay_test.cpp
@@ -20,17 +20,17 @@ using namespace kfr;
TEST(test_fracdelay)
{
univector<double, 5> a({ 1, 2, 3, 4, 5 });
- univector<double, 5> b = native::fracdelay(a, 0.5);
- CHECK(native::rms(b - univector<double>({ 0.5, 1.5, 2.5, 3.5, 4.5 })) < c_epsilon<double> * 5);
+ univector<double, 5> b = fracdelay(a, 0.5);
+ CHECK(rms(b - univector<double>({ 0.5, 1.5, 2.5, 3.5, 4.5 })) < c_epsilon<double> * 5);
- b = native::fracdelay(a, 0.1);
- CHECK(native::rms(b - univector<double>({ 0.9, 1.9, 2.9, 3.9, 4.9 })) < c_epsilon<double> * 5);
+ b = fracdelay(a, 0.1);
+ CHECK(rms(b - univector<double>({ 0.9, 1.9, 2.9, 3.9, 4.9 })) < c_epsilon<double> * 5);
- b = native::fracdelay(a, 0.0);
- CHECK(native::rms(b - univector<double>({ 1, 2, 3, 4, 5 })) < c_epsilon<double> * 5);
+ b = fracdelay(a, 0.0);
+ CHECK(rms(b - univector<double>({ 1, 2, 3, 4, 5 })) < c_epsilon<double> * 5);
- b = native::fracdelay(a, 1.0);
- CHECK(native::rms(b - univector<double>({ 0, 1, 2, 3, 4 })) < c_epsilon<double> * 5);
+ b = fracdelay(a, 1.0);
+ CHECK(rms(b - univector<double>({ 0, 1, 2, 3, 4 })) < c_epsilon<double> * 5);
}
int main(int argc, char** argv)
diff --git a/tests/stat_test.cpp b/tests/stat_test.cpp
@@ -20,32 +20,32 @@ TEST(test_stat)
{
{
univector<float, 5> a({ 1, 2, 3, 4, 5 });
- CHECK(native::sum(a) == 15);
- CHECK(native::mean(a) == 3);
- CHECK(native::min(a) == 1);
- CHECK(native::max(a) == 5);
- CHECK(native::sumsqr(a) == 55);
- CHECK(native::rms(a) == 3.316624790355399849115f);
- CHECK(native::product(a) == 120);
+ CHECK(sum(a) == 15);
+ CHECK(mean(a) == 3);
+ CHECK(minof(a) == 1);
+ CHECK(maxof(a) == 5);
+ CHECK(sumsqr(a) == 55);
+ CHECK(rms(a) == 3.316624790355399849115f);
+ CHECK(product(a) == 120);
}
{
univector<double, 5> a({ 1, 2, 3, 4, 5 });
- CHECK(native::sum(a) == 15);
- CHECK(native::mean(a) == 3);
- CHECK(native::min(a) == 1);
- CHECK(native::max(a) == 5);
- CHECK(native::sumsqr(a) == 55);
- CHECK(native::rms(a) == 3.316624790355399849115);
- CHECK(native::product(a) == 120);
+ CHECK(sum(a) == 15);
+ CHECK(mean(a) == 3);
+ CHECK(minof(a) == 1);
+ CHECK(maxof(a) == 5);
+ CHECK(sumsqr(a) == 55);
+ CHECK(rms(a) == 3.316624790355399849115);
+ CHECK(product(a) == 120);
}
{
univector<int, 5> a({ 1, 2, 3, 4, 5 });
- CHECK(native::sum(a) == 15);
- CHECK(native::mean(a) == 3);
- CHECK(native::min(a) == 1);
- CHECK(native::max(a) == 5);
- CHECK(native::sumsqr(a) == 55);
- CHECK(native::product(a) == 120);
+ CHECK(sum(a) == 15);
+ CHECK(mean(a) == 3);
+ CHECK(minof(a) == 1);
+ CHECK(maxof(a) == 5);
+ CHECK(sumsqr(a) == 55);
+ CHECK(product(a) == 120);
}
}