commit 85000e10217241e44402e582ffe366e1a8932389
parent c9da5fa2939fce901f20629aececfc967673e17c
Author: d.levin256@gmail.com <d.levin256@gmail.com>
Date: Tue, 11 Oct 2016 03:29:41 +0300
Constants refactoring, macros for pragma, MSVC support
Diffstat:
73 files changed, 1887 insertions(+), 1362 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -56,6 +56,9 @@ set(CMAKE_C_FLAGS "${OPT_TARGET} ${OPT_BITNESS} ${OPT_STATIC}" CACHE STRING "com
project(kfr)
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE ${PROJECT_BINARY_DIR}/bin)
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG ${PROJECT_BINARY_DIR}/bin/Debug)
+
include(sources.cmake)
set(ALL_WARNINGS -Weverything -Wno-c++98-compat -Wno-c++98-compat-pedantic -Wno-c99-extensions -Wno-padded)
diff --git a/examples/biquads.cpp b/examples/biquads.cpp
@@ -10,7 +10,7 @@
using namespace kfr;
-int main(int argc, char** argv)
+int main()
{
println(library_version());
diff --git a/examples/dft.cpp b/examples/dft.cpp
@@ -11,7 +11,7 @@
using namespace kfr;
-int main(int argc, char** argv)
+int main()
{
println(library_version());
@@ -45,7 +45,5 @@ int main(int argc, char** argv)
println(in);
println();
println(dB);
- (void)argc;
- (void)argv;
return 0;
}
diff --git a/examples/fir.cpp b/examples/fir.cpp
@@ -10,7 +10,7 @@
using namespace kfr;
-int main(int argc, char** argv)
+int main()
{
println(library_version());
diff --git a/examples/sample_rate_conversion.cpp b/examples/sample_rate_conversion.cpp
@@ -15,7 +15,7 @@ constexpr size_t output_sr = 44100;
constexpr size_t len = 96000 * 6;
constexpr fbase i32max = 2147483647.0;
-int main(int argc, char** argv)
+int main()
{
println(library_version());
diff --git a/examples/window.cpp b/examples/window.cpp
@@ -10,7 +10,7 @@
using namespace kfr;
-int main(int argc, char** argv)
+int main()
{
println(library_version());
diff --git a/include/kfr/base/abs.hpp b/include/kfr/base/abs.hpp
@@ -41,7 +41,7 @@ namespace intrinsics
template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)>
KFR_SINTRIN vec<T, N> abs(const vec<T, N>& x)
{
- return x & internal::invhighbitmask<T>;
+ return x & constants<T>::invhighbitmask();
}
KFR_SINTRIN i64sse abs(const i64sse& x) { return select(x >= 0, x, -x); }
@@ -86,7 +86,7 @@ KFR_SINTRIN f32neon abs(const f32neon& x) { return vabsq_f32(*x); }
#if defined CMT_ARCH_NEON64
KFR_SINTRIN f64neon abs(const f64neon& x) { return vabsq_f64(*x); }
#else
-KFR_SINTRIN f64neon abs(const f64neon& x) { return x & internal::invhighbitmask<f64>; }
+KFR_SINTRIN f64neon abs(const f64neon& x) { return x & constants<f64>::invhighbitmask(); }
#endif
KFR_HANDLE_ALL_SIZES_1(abs)
@@ -97,14 +97,14 @@ KFR_HANDLE_ALL_SIZES_1(abs)
template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)>
KFR_SINTRIN vec<T, N> abs(const vec<T, N>& x)
{
- return x & internal::invhighbitmask<T>;
+ return x & constants<T>::invhighbitmask();
}
// fallback
template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)>
KFR_SINTRIN vec<T, N> abs(const vec<T, N>& x)
{
- return select(x >= T(), x, -x);
+ return select(x >= T(0), x, -x);
}
#endif
KFR_I_CONVERTER(abs)
diff --git a/include/kfr/base/atan.hpp b/include/kfr/base/atan.hpp
@@ -36,8 +36,9 @@ namespace kfr
namespace intrinsics
{
template <size_t N>
-KFR_SINTRIN vec<f32, N> atan2k(vec<f32, N> y, vec<f32, N> x)
+KFR_SINTRIN vec<f32, N> atan2k(const vec<f32, N>& yy, const vec<f32, N>& xx)
{
+ vec<f32, N> x = xx, y = yy;
vec<f32, N> s, t, u;
vec<i32, N> q;
q = select(x < 0, -2, 0);
@@ -64,8 +65,9 @@ KFR_SINTRIN vec<f32, N> atan2k(vec<f32, N> y, vec<f32, N> x)
}
template <size_t N>
-KFR_SINTRIN vec<f64, N> atan2k(vec<f64, N> y, vec<f64, N> x)
+KFR_SINTRIN vec<f64, N> atan2k(const vec<f64, N>& yy, const vec<f64, N>& xx)
{
+ vec<f64, N> x = xx, y = yy;
vec<f64, N> s, t, u;
vec<i64, N> q;
q = select(x < 0, i64(-2), i64(0));
diff --git a/include/kfr/base/basic_expressions.hpp b/include/kfr/base/basic_expressions.hpp
@@ -276,7 +276,7 @@ struct expression_linspace<T, true> : input_expression
return mix((enumerate(x) + cast<T>(cast<TI>(index))) * invsize, cast<T>(start), cast<T>(stop));
}
template <typename U, size_t N>
- CMT_INLINE static vec<U, N> mix(vec<U, N> t, U x, U y)
+ CMT_INLINE static vec<U, N> mix(const vec<U, N>& t, U x, U y)
{
return (U(1.0) - t) * x + t * y;
}
@@ -296,12 +296,12 @@ public:
using T = value_type;
template <typename... Expr_>
- CMT_INLINE expression_sequence(const size_t (&segments)[base::size], Expr_&&... expr) noexcept
+ CMT_INLINE expression_sequence(const size_t (&segments)[base::count], Expr_&&... expr) noexcept
: base(std::forward<Expr_>(expr)...)
{
std::copy(std::begin(segments), std::end(segments), this->segments.begin() + 1);
- this->segments[0] = 0;
- this->segments[base::size + 1] = size_t(-1);
+ this->segments[0] = 0;
+ this->segments[base::count + 1] = size_t(-1);
}
template <size_t N>
@@ -314,7 +314,7 @@ public:
else
{
vec<T, N> result;
-#pragma clang loop unroll_count(4)
+ CMT_PRAGMA_CLANG(clang loop unroll_count(4))
for (size_t i = 0; i < N; i++)
{
sindex = segments[sindex + 1] == index ? sindex + 1 : sindex;
@@ -329,12 +329,12 @@ protected:
template <size_t N>
CMT_NOINLINE vec<T, N> get(cinput_t cinput, size_t index, size_t expr_index, vec_t<T, N> y)
{
- return cswitch(indicesfor<E...>, expr_index,
+ return cswitch(indicesfor_t<E...>(), expr_index,
[&](auto val) { return this->argument(cinput, val, index, y); },
[&]() { return zerovector(y); });
}
- std::array<size_t, base::size + 2> segments;
+ std::array<size_t, base::count + 2> segments;
};
template <typename Fn, typename E>
@@ -475,7 +475,7 @@ struct multioutput : output_expression
template <typename T, size_t N>
void operator()(coutput_t coutput, size_t index, const vec<T, N>& x)
{
- cfor(csize<0>, csize<sizeof...(E)>,
+ cfor(csize_t<0>(), csize_t<sizeof...(E)>(),
[&](auto n) { std::get<val_of(decltype(n)())>(outputs)(coutput, index, x); });
}
std::tuple<E...> outputs;
@@ -517,7 +517,7 @@ struct expression_unpack : private expression<E...>, output_expression
template <typename U, size_t N>
CMT_INLINE void operator()(coutput_t coutput, size_t index, const vec<vec<U, count>, N>& x)
{
- output(coutput, index, x, csizeseq<count>);
+ output(coutput, index, x, csizeseq_t<count>());
}
template <typename Input, KFR_ENABLE_IF(is_input_expression<Input>::value)>
@@ -576,9 +576,9 @@ task_partition<OutExpr, InExpr> partition(OutExpr&& output, InExpr&& input, size
{
static_assert(!is_infinite<OutExpr>::value || !is_infinite<InExpr>::value, "");
- minimum_size = minimum_size == 0 ? vector_width<T> * 8 : minimum_size;
+ minimum_size = minimum_size == 0 ? platform<T>::vector_width * 8 : minimum_size;
const size_t size = size_min(output.size(), input.size());
- const size_t chunk_size = align_up(std::max(size / count, minimum_size), vector_width<T>);
+ const size_t chunk_size = align_up(std::max(size / count, minimum_size), platform<T>::vector_width);
task_partition<OutExpr, InExpr> result(std::forward<OutExpr>(output), std::forward<InExpr>(input), size,
chunk_size, (size + chunk_size - 1) / chunk_size);
diff --git a/include/kfr/base/complex.hpp b/include/kfr/base/complex.hpp
@@ -40,6 +40,9 @@
#include <complex>
#endif
+CMT_PRAGMA_MSVC(warning(push))
+CMT_PRAGMA_MSVC(warning(disable : 4814))
+
namespace kfr
{
#ifdef KFR_STD_COMPLEX
@@ -72,8 +75,13 @@ struct complex
constexpr complex(complex<U>&& other) noexcept : re(std::move(other.re)), im(std::move(other.im))
{
}
+#ifdef CMT_COMPILER_GNU
constexpr complex& operator=(const complex&) noexcept = default;
constexpr complex& operator=(complex&&) noexcept = default;
+#else
+ complex& operator=(const complex&) = default;
+ complex& operator=(complex&&) = default;
+#endif
constexpr const T& real() const noexcept { return re; }
constexpr const T& imag() const noexcept { return im; }
constexpr void real(T value) noexcept { re = value; }
@@ -158,7 +166,7 @@ struct compound_type_traits<kfr::complex<T>>
template <typename U>
using rebind = kfr::complex<U>;
template <typename U>
- using deep_rebind = kfr::complex<cometa::deep_rebind<subtype, U>>;
+ using deep_rebind = kfr::complex<typename compound_type_traits<subtype>::template deep_rebind<U>>;
static constexpr subtype at(const kfr::complex<T>& value, size_t index)
{
@@ -189,15 +197,15 @@ struct vec_op<complex<T>, N> : private vec_op<T, N * 2>
constexpr static size_t w = N * 2;
- CMT_INLINE constexpr static simd<scalar_type, w> mul(const simd<scalar_type, w>& x,
- const simd<scalar_type, w>& y) noexcept
+ CMT_INLINE static simd<scalar_type, w> mul(const simd<scalar_type, w>& x,
+ const simd<scalar_type, w>& y) noexcept
{
const vec<scalar_type, w> xx = x;
const vec<scalar_type, w> yy = y;
return *subadd(xx * dupeven(yy), swap<2>(xx) * dupodd(yy));
}
- CMT_INLINE constexpr static simd<scalar_type, w> div(const simd<scalar_type, w>& x,
- const simd<scalar_type, w>& y) noexcept
+ CMT_INLINE static simd<scalar_type, w> div(const simd<scalar_type, w>& x,
+ const simd<scalar_type, w>& y) noexcept
{
const vec<scalar_type, w> xx = x;
const vec<scalar_type, w> yy = y;
@@ -670,4 +678,26 @@ struct common_type<T1, kfr::complex<T2>>
{
using type = kfr::complex<typename common_type<T1, T2>::type>;
};
+template <typename T1, typename T2, size_t N>
+struct common_type<kfr::complex<T1>, kfr::vec<kfr::complex<T2>, N>>
+{
+ using type = kfr::vec<kfr::complex<typename common_type<T1, T2>::type>, N>;
+};
+template <typename T1, typename T2, size_t N>
+struct common_type<kfr::vec<kfr::complex<T1>, N>, kfr::complex<T2>>
+{
+ using type = kfr::vec<kfr::complex<typename common_type<T1, T2>::type>, N>;
+};
+template <typename T1, typename T2, size_t N>
+struct common_type<kfr::complex<T1>, kfr::vec<T2, N>>
+{
+ using type = kfr::vec<kfr::complex<typename common_type<T1, T2>::type>, N>;
+};
+template <typename T1, typename T2, size_t N>
+struct common_type<kfr::vec<T1, N>, kfr::complex<T2>>
+{
+ using type = kfr::vec<kfr::complex<typename common_type<T1, T2>::type>, N>;
+};
}
+
+CMT_PRAGMA_MSVC(warning(pop))
diff --git a/include/kfr/base/constants.hpp b/include/kfr/base/constants.hpp
@@ -31,66 +31,256 @@
namespace kfr
{
+#if CMT_COMPILER_GNU
+constexpr double infinity = __builtin_inf();
+constexpr double qnan = __builtin_nan("");
+#else
+constexpr double infinity = HUGE_VAL;
+constexpr double qnan = NAN;
+#endif
+
+template <typename T>
+struct constants
+{
+public:
+ using Tsub = subtype<T>;
+
+ constexpr static Tsub pi_s(int m, int d = 1) { return pi * m / d; }
+ constexpr static Tsub recip_pi_s(int m, int d = 1) { return recip_pi * m / d; }
+
+ constexpr static Tsub pi = static_cast<Tsub>(3.1415926535897932384626433832795);
+ constexpr static Tsub sqr_pi = static_cast<Tsub>(9.8696044010893586188344909998762);
+ constexpr static Tsub recip_pi = static_cast<Tsub>(0.31830988618379067153776752674503);
+ constexpr static Tsub degtorad = static_cast<Tsub>(pi / 180);
+ constexpr static Tsub radtodeg = static_cast<Tsub>(pi * 180);
+ constexpr static Tsub e = static_cast<Tsub>(2.718281828459045235360287471352662);
+ constexpr static Tsub recip_log_2 = static_cast<Tsub>(1.442695040888963407359924681001892137426645954);
+ constexpr static Tsub recip_log_10 = static_cast<Tsub>(0.43429448190325182765112891891661);
+ constexpr static Tsub log_2 = static_cast<Tsub>(0.69314718055994530941723212145818);
+ constexpr static Tsub log_10 = static_cast<Tsub>(2.3025850929940456840179914546844);
+ constexpr static Tsub sqrt_2 = static_cast<Tsub>(1.4142135623730950488016887242097);
+
+ constexpr static Tsub fold_constant_div = choose_const<Tsub>(
+ CMT_FP(0x1.921fb6p-1f, 7.8539818525e-01f), CMT_FP(0x1.921fb54442d18p-1, 7.853981633974482790e-01));
+
+ constexpr static Tsub fold_constant_hi = choose_const<Tsub>(
+ CMT_FP(0x1.922000p-1f, 7.8540039062e-01f), CMT_FP(0x1.921fb40000000p-1, 7.853981256484985352e-01));
+ constexpr static Tsub fold_constant_rem1 =
+ choose_const<Tsub>(CMT_FP(-0x1.2ae000p-19f, -2.2267922759e-06f),
+ CMT_FP(0x1.4442d00000000p-25, 3.774894707930798177e-08));
+ constexpr static Tsub fold_constant_rem2 =
+ choose_const<Tsub>(CMT_FP(-0x1.de973ep-32f, -4.3527578764e-10f),
+ CMT_FP(0x1.8469898cc5170p-49, 2.695151429079059484e-15));
+
+ constexpr static Tsub epsilon = std::numeric_limits<Tsub>::epsilon();
+ constexpr static Tsub infinity = std::numeric_limits<Tsub>::infinity();
+ constexpr static Tsub neginfinity = -std::numeric_limits<Tsub>::infinity();
+ constexpr static Tsub qnan = std::numeric_limits<Tsub>::quiet_NaN();
+
+#if CMT_COMPILER_GNU
+
+ constexpr static Tsub allones()
+ {
+ if (is_same<Tsub, f32>::value)
+ {
+ return -__builtin_nanf("0xFFFFFFFF");
+ }
+ else if (is_same<Tsub, f64>::value)
+ {
+ return -__builtin_nan("0xFFFFFFFFFFFFFFFF");
+ }
+ else
+ {
+ return static_cast<Tsub>(-1ll);
+ }
+ }
+
+ constexpr static Tsub allzeros() { return Tsub(0); }
+
+ constexpr static Tsub highbitmask()
+ {
+ if (is_same<Tsub, f32>::value)
+ {
+ return -0.0f;
+ }
+ else if (is_same<Tsub, f64>::value)
+ {
+ return -0.0;
+ }
+ else
+ {
+ return static_cast<Tsub>(1ull << (sizeof(Tsub) * 8 - 1));
+ }
+ }
+
+ constexpr static Tsub invhighbitmask()
+ {
+ if (is_same<Tsub, f32>::value)
+ {
+ return __builtin_nanf("0xFFFFFFFF");
+ }
+ else if (is_same<Tsub, f64>::value)
+ {
+ return __builtin_nan("0xFFFFFFFFFFFFFFFF");
+ }
+ else
+ {
+ return static_cast<Tsub>((1ull << (sizeof(Tsub) * 8 - 1)) - 1);
+ }
+ }
+#else
+
+ static Tsub allones()
+ {
+ if (is_same<Tsub, f32>::value)
+ {
+ return static_cast<Tsub>(bitcast<f32>(0xFFFFFFFFu));
+ }
+ else if (is_same<Tsub, f64>::value)
+ {
+ return static_cast<Tsub>(bitcast<f64>(0xFFFFFFFFFFFFFFFFull));
+ }
+ else
+ {
+ return static_cast<Tsub>(-1ll);
+ }
+ }
+
+ constexpr static Tsub allzeros() { return Tsub(0); }
+
+ static Tsub highbitmask()
+ {
+ if (is_same<Tsub, f32>::value)
+ {
+ return static_cast<Tsub>(-0.0f);
+ }
+ else if (is_same<Tsub, f64>::value)
+ {
+ return static_cast<Tsub>(-0.0);
+ }
+ else
+ {
+ return static_cast<Tsub>(1ull << (sizeof(Tsub) * 8 - 1));
+ }
+ }
+
+ static Tsub invhighbitmask()
+ {
+ if (is_same<Tsub, f32>::value)
+ {
+ return static_cast<Tsub>(bitcast<f32>(0x7FFFFFFFu));
+ }
+ else if (is_same<Tsub, f64>::value)
+ {
+ return static_cast<Tsub>(bitcast<f64>(0x7FFFFFFFFFFFFFFFull));
+ }
+ else
+ {
+ return static_cast<Tsub>((1ull << (sizeof(Tsub) * 8 - 1)) - 1);
+ }
+ }
+#endif
+};
+
+template <typename T>
+constexpr subtype<T> constants<T>::pi;
+template <typename T>
+constexpr subtype<T> constants<T>::sqr_pi;
+template <typename T>
+constexpr subtype<T> constants<T>::recip_pi;
+template <typename T>
+constexpr subtype<T> constants<T>::degtorad;
+template <typename T>
+constexpr subtype<T> constants<T>::radtodeg;
+template <typename T>
+constexpr subtype<T> constants<T>::e;
+template <typename T>
+constexpr subtype<T> constants<T>::recip_log_2;
+template <typename T>
+constexpr subtype<T> constants<T>::recip_log_10;
+template <typename T>
+constexpr subtype<T> constants<T>::log_2;
+template <typename T>
+constexpr subtype<T> constants<T>::log_10;
+template <typename T>
+constexpr subtype<T> constants<T>::sqrt_2;
+template <typename T>
+constexpr subtype<T> constants<T>::fold_constant_div;
+template <typename T>
+constexpr subtype<T> constants<T>::fold_constant_hi;
+template <typename T>
+constexpr subtype<T> constants<T>::fold_constant_rem1;
+template <typename T>
+constexpr subtype<T> constants<T>::fold_constant_rem2;
+template <typename T>
+constexpr subtype<T> constants<T>::epsilon;
+template <typename T>
+constexpr subtype<T> constants<T>::infinity;
+template <typename T>
+constexpr subtype<T> constants<T>::neginfinity;
+template <typename T>
+constexpr subtype<T> constants<T>::qnan;
+
// π (pi)
// c_pi<f64, 4> = 4pi
// c_pi<f64, 3, 4> = 3/4pi
-template <typename T, int m = 1, int d = 1, typename Tsub = subtype<T>>
-constexpr Tsub c_pi = Tsub(3.1415926535897932384626433832795 * m / d);
+template <typename T, int m = 1, int d = 1>
+constexpr subtype<T> c_pi = subtype<T>(3.1415926535897932384626433832795 * m / d);
// π² (pi²)
// c_sqr_pi<f64, 4> = 4pi²
// c_sqr_pi<f64, 3, 4> = 3/4pi²
-template <typename T, int m = 1, int d = 1, typename Tsub = subtype<T>>
-constexpr Tsub c_sqr_pi = Tsub(9.8696044010893586188344909998762 * m / d);
+template <typename T, int m = 1, int d = 1>
+constexpr subtype<T> c_sqr_pi = subtype<T>(9.8696044010893586188344909998762 * m / d);
// 1/π (1/pi)
// c_recip_pi<f64> 1/pi
// c_recip_pi<f64, 4> 4/pi
-template <typename T, int m = 1, int d = 1, typename Tsub = subtype<T>>
-constexpr Tsub c_recip_pi = Tsub(0.31830988618379067153776752674503 * m / d);
+template <typename T, int m = 1, int d = 1>
+constexpr subtype<T> c_recip_pi = subtype<T>(0.31830988618379067153776752674503 * m / d);
// degree to radian conversion factor
-template <typename T, typename Tsub = subtype<T>>
-constexpr Tsub c_degtorad = c_pi<T, 1, 180>;
+template <typename T>
+constexpr subtype<T> c_degtorad = c_pi<T, 1, 180>;
// radian to degree conversion factor
-template <typename T, typename Tsub = subtype<T>>
-constexpr Tsub c_radtodeg = c_recip_pi<T, 180>;
+template <typename T>
+constexpr subtype<T> c_radtodeg = c_recip_pi<T, 180>;
// e, Euler's number
-template <typename T, int m = 1, int d = 1, typename Tsub = subtype<T>>
-constexpr Tsub c_e = Tsub(2.718281828459045235360287471352662 * m / d);
+template <typename T, int m = 1, int d = 1>
+constexpr subtype<T> c_e = subtype<T>(2.718281828459045235360287471352662 * m / d);
-template <typename T, typename Tsub = subtype<T>>
-constexpr unsigned c_mantissa_bits = sizeof(Tsub) == 32 ? 23 : 52;
+template <typename T>
+constexpr unsigned c_mantissa_bits = sizeof(subtype<T>) == 32 ? 23 : 52;
-template <typename T, typename Tsub = usubtype<T>>
-constexpr Tsub c_mantissa_mask = (Tsub(1) << c_mantissa_bits<T>)-1;
+template <typename T>
+constexpr subtype<T> c_mantissa_mask = (subtype<T>(1) << c_mantissa_bits<T>)-1;
-template <typename T, typename Tsub = subtype<T>>
-constexpr Tsub c_epsilon = (std::numeric_limits<Tsub>::epsilon());
+template <typename T>
+constexpr subtype<T> c_epsilon = (std::numeric_limits<subtype<T>>::epsilon());
-template <typename T, typename Tsub = subtype<T>>
-constexpr Tsub c_infinity = std::numeric_limits<Tsub>::infinity();
+template <typename T>
+constexpr subtype<T> c_infinity = std::numeric_limits<subtype<T>>::infinity();
-template <typename T, typename Tsub = subtype<T>>
-constexpr Tsub c_neginfinity = -std::numeric_limits<Tsub>::infinity();
+template <typename T>
+constexpr subtype<T> c_neginfinity = -std::numeric_limits<subtype<T>>::infinity();
-template <typename T, typename Tsub = subtype<T>>
-constexpr Tsub c_qnan = std::numeric_limits<Tsub>::quiet_NaN();
+template <typename T>
+constexpr subtype<T> c_qnan = std::numeric_limits<subtype<T>>::quiet_NaN();
-template <typename T, typename Tsub = subtype<T>>
-constexpr Tsub c_recip_log_2 = Tsub(1.442695040888963407359924681001892137426645954);
+template <typename T>
+constexpr subtype<T> c_recip_log_2 = subtype<T>(1.442695040888963407359924681001892137426645954);
-template <typename T, typename Tsub = subtype<T>>
-constexpr Tsub c_recip_log_10 = Tsub(0.43429448190325182765112891891661);
+template <typename T>
+constexpr subtype<T> c_recip_log_10 = subtype<T>(0.43429448190325182765112891891661);
-template <typename T, typename Tsub = subtype<T>>
-constexpr Tsub c_log_2 = Tsub(0.69314718055994530941723212145818);
+template <typename T>
+constexpr subtype<T> c_log_2 = subtype<T>(0.69314718055994530941723212145818);
-template <typename T, typename Tsub = subtype<T>>
-constexpr Tsub c_log_10 = Tsub(2.3025850929940456840179914546844);
+template <typename T>
+constexpr subtype<T> c_log_10 = subtype<T>(2.3025850929940456840179914546844);
-template <typename T, int m = 1, int d = 1, typename Tsub = subtype<T>>
-constexpr Tsub c_sqrt_2 = Tsub(1.4142135623730950488016887242097 * m / d);
+template <typename T, int m = 1, int d = 1>
+constexpr subtype<T> c_sqrt_2 = subtype<T>(1.4142135623730950488016887242097 * m / d);
}
diff --git a/include/kfr/base/cpuid.hpp b/include/kfr/base/cpuid.hpp
@@ -25,6 +25,10 @@
*/
#pragma once
+#ifdef _MSC_VER
+#include <intrin.h>
+#endif
+
#include "types.hpp"
#include <cstring>
@@ -121,9 +125,21 @@ CMT_INLINE void cpuid(u32* ptr, u32 func, u32 subfunc = 0)
CMT_INLINE u32 get_xcr0()
{
u32 xcr0;
- __asm__("xgetbv" : "=a"(xcr0) : "c"(0) : "%edx");
+ __asm__ __volatile__("xgetbv" : "=a"(xcr0) : "c"(0) : "%edx");
return xcr0;
}
+#elif defined CMT_COMPILER_MSVC
+
+CMT_INLINE void cpuid(u32* ptr, u32 func, u32 subfunc = 0) { __cpuidex((int*)ptr, (int)func, (int)subfunc); }
+CMT_INLINE u32 get_xcr0()
+{
+#ifdef _XCR_XFEATURE_ENABLED_MASK
+ unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
+ return (u32)Result;
+#else
+ return 0;
+#endif
+}
#endif
template <size_t = 0>
diff --git a/include/kfr/base/digitreverse.hpp b/include/kfr/base/digitreverse.hpp
@@ -48,13 +48,14 @@ constexpr inline u32 bit_permute_step_simple(u32 x, u32 m, u32 shift)
return ((x & m) << shift) | ((x >> shift) & m);
}
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wshift-count-overflow"
-#pragma GCC diagnostic ignored "-Wshift-count-negative"
+CMT_PRAGMA_GNU(GCC diagnostic push)
+CMT_PRAGMA_GNU(GCC diagnostic ignored "-Wshift-count-overflow")
+CMT_PRAGMA_GNU(GCC diagnostic ignored "-Wshift-count-negative")
template <size_t radix, size_t bits>
constexpr enable_if<radix == 4, u32> digitreverse(u32 x)
{
+#ifdef CMT_COMPILER_GNU
if (bits <= 2)
return x;
if (bits <= 4)
@@ -84,9 +85,16 @@ constexpr enable_if<radix == 4, u32> digitreverse(u32 x)
return x >> (32 - bits);
}
return x;
+#else
+ x = bit_permute_step_simple(x, 0x33333333, 2); // Bit index complement 1 regroups 4 bits
+ x = bit_permute_step_simple(x, 0x0f0f0f0f, 4); // Bit index complement 2 regroups 8 bits
+ x = bit_permute_step_simple(x, 0x00ff00ff, 8); // Bit index complement 3 regroups 16 bits
+ x = bit_permute_step_simple(x, 0x0000ffff, 16); // Bit index complement 4 regroups 32 bits
+ return x >> (32 - bits);
+#endif
}
-#pragma GCC diagnostic pop
+CMT_PRAGMA_GNU(GCC diagnostic pop)
template <size_t radix, size_t bits>
struct shuffle_index_digitreverse
diff --git a/include/kfr/base/expression.hpp b/include/kfr/base/expression.hpp
@@ -31,8 +31,8 @@
#include <tuple>
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wshadow"
+CMT_PRAGMA_GNU(GCC diagnostic push)
+CMT_PRAGMA_GNU(GCC diagnostic ignored "-Wshadow")
namespace kfr
{
@@ -399,9 +399,9 @@ CMT_INLINE size_t process(OutputExpr&& out, const InputExpr& in, size_t start =
in.begin_block(cinput, size);
#ifdef NDEBUG
- constexpr size_t w = width == 0 ? internal::get_vector_width<Tin, c>(2, 4) : width;
+ constexpr size_t w = width == 0 ? platform<Tin, c>::vector_width * bitness_const(2, 4) : width;
#else
- constexpr size_t w = width == 0 ? internal::get_vector_width<Tin, c>(1, 1) : width;
+ constexpr size_t w = width == 0 ? platform<Tin, c>::vector_width * bitness_const(1, 1) : width;
#endif
size_t i = start;
@@ -442,4 +442,5 @@ struct output_expression_base : output_expression
}
};
}
-#pragma clang diagnostic pop
+
+CMT_PRAGMA_GNU(GCC diagnostic pop)
diff --git a/include/kfr/base/function.hpp b/include/kfr/base/function.hpp
@@ -30,8 +30,8 @@
#include "types.hpp"
#include "vec.hpp"
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wshadow"
+CMT_PRAGMA_GNU(GCC diagnostic push)
+CMT_PRAGMA_GNU(GCC diagnostic ignored "-Wshadow")
namespace kfr
{
@@ -127,10 +127,11 @@ template <cpu_t c, typename T>
constexpr inline size_t next_simd_width(size_t n)
{
#ifdef CMT_ARCH_X86
- return n > vector_width<T, cpu_t::sse2> ? vector_width<T, c> : vector_width<T, cpu_t::sse2>;
+ return n > platform<T, cpu_t::sse2>::vector_width ? platform<T, c>::vector_width
+ : platform<T, cpu_t::sse2>::vector_width;
#endif
#ifdef CMT_ARCH_ARM
- return vector_width<T, cpu_t::neon>;
+ return platform<T, cpu_t::neon>::vector_width;
#endif
}
@@ -147,116 +148,112 @@ KFR_SINTRIN vec<T, Nout> expand_simd(const vec<T, N>& x, identity<T> value)
}
#define KFR_HANDLE_ALL_SIZES_1(fn) \
- template <typename T, size_t N, KFR_ENABLE_IF(N < vector_width<T, cpu_t::native>)> \
+ template <typename T, size_t N, KFR_ENABLE_IF(N < platform<T>::vector_width)> \
KFR_SINTRIN vec<T, N> fn(const vec<T, N>& a) \
{ \
return slice<0, N>(fn(expand_simd(a))); \
} \
- template <typename T, size_t N, KFR_ENABLE_IF(N >= vector_width<T, cpu_t::native>), typename = void> \
+ template <typename T, size_t N, KFR_ENABLE_IF(N >= platform<T>::vector_width), typename = void> \
KFR_SINTRIN vec<T, N> fn(const vec<T, N>& a) \
{ \
return concat(fn(low(a)), fn(high(a))); \
}
#define KFR_HANDLE_ALL_SIZES_FLT_1(fn) \
- template <typename T, size_t N, KFR_ENABLE_IF(N < vector_width<T, cpu_t::native>)> \
+ template <typename T, size_t N, KFR_ENABLE_IF(N < platform<T>::vector_width)> \
KFR_SINTRIN vec<flt_type<T>, N> fn(const vec<T, N>& a) \
{ \
return slice<0, N>(fn(expand_simd(cast<flt_type<T>>(a)))); \
} \
- template <typename T, size_t N, KFR_ENABLE_IF(N >= vector_width<T, cpu_t::native>), typename = void> \
+ template <typename T, size_t N, KFR_ENABLE_IF(N >= platform<T>::vector_width), typename = void> \
KFR_SINTRIN vec<flt_type<T>, N> fn(const vec<T, N>& a) \
{ \
return concat(fn(low(cast<flt_type<T>>(a))), fn(high(cast<flt_type<T>>(a)))); \
}
#define KFR_HANDLE_ALL_SIZES_F_1(fn) \
- template <typename T, size_t N, \
- KFR_ENABLE_IF(N < vector_width<T, cpu_t::native> && is_f_class<T>::value)> \
+ template <typename T, size_t N, KFR_ENABLE_IF(N < platform<T>::vector_width && is_f_class<T>::value)> \
KFR_SINTRIN vec<T, N> fn(const vec<T, N>& a) \
{ \
return slice<0, N>(fn(expand_simd(a))); \
} \
- template <typename T, size_t N, \
- KFR_ENABLE_IF(N >= vector_width<T, cpu_t::native> && is_f_class<T>::value), typename = void> \
+ template <typename T, size_t N, KFR_ENABLE_IF(N >= platform<T>::vector_width && is_f_class<T>::value), \
+ typename = void> \
KFR_SINTRIN vec<T, N> fn(const vec<T, N>& a) \
{ \
return concat(fn(low(a)), fn(high(a))); \
}
#define KFR_HANDLE_ALL_SIZES_I_1(fn) \
- template <typename T, size_t N, \
- KFR_ENABLE_IF(N < vector_width<T, cpu_t::native> && is_i_class<T>::value)> \
+ template <typename T, size_t N, KFR_ENABLE_IF(N < platform<T>::vector_width && is_i_class<T>::value)> \
KFR_SINTRIN vec<T, N> fn(const vec<T, N>& a) \
{ \
return slice<0, N>(fn(expand_simd(a))); \
} \
- template <typename T, size_t N, \
- KFR_ENABLE_IF(N >= vector_width<T, cpu_t::native> && is_i_class<T>::value), typename = void> \
+ template <typename T, size_t N, KFR_ENABLE_IF(N >= platform<T>::vector_width && is_i_class<T>::value), \
+ typename = void> \
KFR_SINTRIN vec<T, N> fn(const vec<T, N>& a) \
{ \
return concat(fn(low(a)), fn(high(a))); \
}
#define KFR_HANDLE_ALL_SIZES_U_1(fn) \
- template <typename T, size_t N, \
- KFR_ENABLE_IF(N < vector_width<T, cpu_t::native> && is_u_class<T>::value)> \
+ template <typename T, size_t N, KFR_ENABLE_IF(N < platform<T>::vector_width && is_u_class<T>::value)> \
KFR_SINTRIN vec<T, N> fn(const vec<T, N>& a) \
{ \
return slice<0, N>(fn(expand_simd(a))); \
} \
- template <typename T, size_t N, \
- KFR_ENABLE_IF(N >= vector_width<T, cpu_t::native> && is_u_class<T>::value), typename = void> \
+ template <typename T, size_t N, KFR_ENABLE_IF(N >= platform<T>::vector_width && is_u_class<T>::value), \
+ typename = void> \
KFR_SINTRIN vec<T, N> fn(const vec<T, N>& a) \
{ \
return concat(fn(low(a)), fn(high(a))); \
}
#define KFR_HANDLE_ALL_SIZES_NOT_F_1(fn) \
- template <typename T, size_t N, \
- KFR_ENABLE_IF(N < vector_width<T, cpu_t::native> && !is_f_class<T>::value)> \
+ template <typename T, size_t N, KFR_ENABLE_IF(N < platform<T>::vector_width && !is_f_class<T>::value)> \
KFR_SINTRIN vec<T, N> fn(const vec<T, N>& a) \
{ \
return slice<0, N>(fn(expand_simd(a))); \
} \
- template <typename T, size_t N, \
- KFR_ENABLE_IF(N >= vector_width<T, cpu_t::native> && !is_f_class<T>::value), typename = void> \
+ template <typename T, size_t N, KFR_ENABLE_IF(N >= platform<T>::vector_width && !is_f_class<T>::value), \
+ typename = void> \
KFR_SINTRIN vec<T, N> fn(const vec<T, N>& a) \
{ \
return concat(fn(low(a)), fn(high(a))); \
}
#define KFR_HANDLE_ALL_SIZES_2(fn) \
- template <typename T, size_t N, KFR_ENABLE_IF(N < vector_width<T, cpu_t::native>)> \
+ template <typename T, size_t N, KFR_ENABLE_IF(N < platform<T>::vector_width)> \
KFR_SINTRIN vec<T, N> fn(const vec<T, N>& a, const vec<T, N>& b) \
{ \
return slice<0, N>(fn(expand_simd(a), expand_simd(b))); \
} \
- template <typename T, size_t N, KFR_ENABLE_IF(N >= vector_width<T, cpu_t::native>), typename = void> \
+ template <typename T, size_t N, KFR_ENABLE_IF(N >= platform<T>::vector_width), typename = void> \
KFR_SINTRIN vec<T, N> fn(const vec<T, N>& a, const vec<T, N>& b) \
{ \
return concat(fn(low(a), low(b)), fn(high(a), high(b))); \
}
#define KFR_HANDLE_ALL_SIZES_3(fn) \
- template <typename T, size_t N, KFR_ENABLE_IF(N < vector_width<T, cpu_t::native>)> \
+ template <typename T, size_t N, KFR_ENABLE_IF(N < platform<T>::vector_width)> \
KFR_SINTRIN vec<T, N> fn(const vec<T, N>& a, const vec<T, N>& b, const vec<T, N>& c) \
{ \
return slice<0, N>(fn(expand_simd(a), expand_simd(b), expand_simd(c))); \
} \
- template <typename T, size_t N, KFR_ENABLE_IF(N >= vector_width<T, cpu_t::native>), typename = void> \
+ template <typename T, size_t N, KFR_ENABLE_IF(N >= platform<T>::vector_width), typename = void> \
KFR_SINTRIN vec<T, N> fn(const vec<T, N>& a, const vec<T, N>& b, const vec<T, N>& c) \
{ \
return concat(fn(low(a), low(b), low(c)), fn(high(a), high(b), high(c))); \
}
#define KFR_HANDLE_ALL_SIZES_4(fn) \
- template <typename T, size_t N, KFR_ENABLE_IF(N < vector_width<T, cpu_t::native>)> \
+ template <typename T, size_t N, KFR_ENABLE_IF(N < platform<T>::vector_width)> \
KFR_SINTRIN vec<T, N> fn(const vec<T, N>& a, const vec<T, N>& b, const vec<T, N>& c, const vec<T, N>& d) \
{ \
return slice<0, N>(fn(expand_simd(a), expand_simd(b), expand_simd(c), expand_simd(d))); \
} \
- template <typename T, size_t N, KFR_ENABLE_IF(N >= vector_width<T, cpu_t::native>), typename = void> \
+ template <typename T, size_t N, KFR_ENABLE_IF(N >= platform<T>::vector_width), typename = void> \
KFR_SINTRIN vec<T, N> fn(const vec<T, N>& a, const vec<T, N>& b, const vec<T, N>& c, const vec<T, N>& d) \
{ \
return concat(fn(low(a), low(b), low(c), low(d)), fn(high(a), high(b), high(c), high(d))); \
@@ -277,4 +274,4 @@ inline T to_scalar(const vec<T, 1>& value)
}
}
}
-#pragma clang diagnostic pop
+CMT_PRAGMA_GNU(GCC diagnostic pop)
diff --git a/include/kfr/base/gamma.hpp b/include/kfr/base/gamma.hpp
@@ -27,9 +27,9 @@
#include "function.hpp"
#include "log_exp.hpp"
-#pragma clang diagnostic push
+CMT_PRAGMA_GNU(GCC diagnostic push)
#if CMT_HAS_WARNING("-Wc99-extensions")
-#pragma clang diagnostic ignored "-Wc99-extensions"
+CMT_PRAGMA_GNU(GCC diagnostic ignored "-Wc99-extensions")
#endif
namespace kfr
@@ -92,4 +92,4 @@ KFR_INTRIN internal::expression_function<fn::factorial_approx, E1> factorial_app
}
}
-#pragma clang diagnostic pop
+CMT_PRAGMA_GNU(GCC diagnostic pop)
diff --git a/include/kfr/base/generators.hpp b/include/kfr/base/generators.hpp
@@ -58,7 +58,7 @@ protected:
template <size_t N>
void call_shift(csize_t<N>) const
{
- ptr_cast<Class>(this)->shift(csize<N>);
+ ptr_cast<Class>(this)->shift(csize_t<N>());
}
template <size_t N>
@@ -81,7 +81,7 @@ protected:
CMT_INLINE vec<T, N> generate(vec_t<T, N>) const
{
const vec<T, N> result = narrow<N>(value);
- shift(csize<N>);
+ shift(csize_t<N>());
return result;
}
@@ -96,7 +96,7 @@ protected:
mutable vec<T, width> value;
};
-template <typename T, size_t width = get_vector_width<T, cpu_t::native>(1, 2)>
+template <typename T, size_t width = platform<T>::vector_width* bitness_const(1, 2)>
struct generator_linear : generator<T, width, generator_linear<T, width>>
{
constexpr generator_linear(T start, T step) noexcept : step(step), vstep(step* width)
@@ -113,7 +113,7 @@ protected:
T vstep;
};
-template <typename T, size_t width = get_vector_width<T, cpu_t::native>(1, 2), KFR_ARCH_DEP>
+template <typename T, size_t width = platform<T>::vector_width* bitness_const(1, 2), KFR_ARCH_DEP>
struct generator_exp : generator<T, width, generator_exp<T, width>>
{
generator_exp(T start, T step) noexcept : step(step), vstep(exp(make_vector(step* width))[0] - 1)
@@ -130,7 +130,7 @@ protected:
T vstep;
};
-template <typename T, size_t width = get_vector_width<T, cpu_t::native>(1, 2), KFR_ARCH_DEP>
+template <typename T, size_t width = platform<T>::vector_width* bitness_const(1, 2), KFR_ARCH_DEP>
struct generator_exp2 : generator<T, width, generator_exp2<T, width>>
{
generator_exp2(T start, T step) noexcept : step(step), vstep(exp2(make_vector(step* width))[0] - 1)
@@ -147,7 +147,7 @@ protected:
T vstep;
};
-template <typename T, size_t width = get_vector_width<T, cpu_t::native>(1, 2), KFR_ARCH_DEP>
+template <typename T, size_t width = platform<T>::vector_width* bitness_const(1, 2), KFR_ARCH_DEP>
struct generator_cossin : generator<T, width, generator_cossin<T, width>>
{
generator_cossin(T start, T step)
@@ -172,7 +172,7 @@ protected:
}
};
-template <typename T, size_t width = get_vector_width<T, cpu_t::native>(2, 4), KFR_ARCH_DEP>
+template <typename T, size_t width = platform<T>::vector_width* bitness_const(2, 4), KFR_ARCH_DEP>
struct generator_sin : generator<T, width, generator_sin<T, width>>
{
generator_sin(T start, T step)
diff --git a/include/kfr/base/kfr.h b/include/kfr/base/kfr.h
@@ -14,6 +14,14 @@
#define KFR_VERSION_BUILD 0
#define KFR_VERSION (KFR_VERSION_MAJOR * 10000 + KFR_VERSION_MINOR * 100 + KFR_VERSION_BUILD)
+#ifdef CMT_ARCH_X64
+#define KFR_VERSION_FULL \
+ "KFR " KFR_VERSION_STRING " " CMT_STRINGIFY(CMT_ARCH_NAME) " 64-bit (" CMT_COMPIER_NAME ")"
+#else
+#define KFR_VERSION_FULL \
+ "KFR " KFR_VERSION_STRING " " CMT_STRINGIFY(CMT_ARCH_NAME) " 32-bit (" CMT_COMPIER_NAME ")"
+#endif
+
#ifdef __cplusplus
namespace kfr
{
@@ -22,14 +30,9 @@ constexpr int version_major = KFR_VERSION_MAJOR;
constexpr int version_minor = KFR_VERSION_MINOR;
constexpr int version_build = KFR_VERSION_BUILD;
constexpr int version = KFR_VERSION;
+constexpr const char version_full[] = KFR_VERSION_FULL;
}
#endif
-#ifdef CMT_ARCH_X64
-#define KFR_VERSION_FULL "KFR " KFR_VERSION_STRING " " CMT_STRINGIFY(CMT_ARCH_NAME) " 64-bit"
-#else
-#define KFR_VERSION_FULL "KFR " KFR_VERSION_STRING " " CMT_STRINGIFY(CMT_ARCH_NAME) " 32-bit"
-#endif
-
#define KFR_INTRIN CMT_INTRIN
#define KFR_SINTRIN CMT_INTRIN static
diff --git a/include/kfr/base/log_exp.hpp b/include/kfr/base/log_exp.hpp
@@ -96,7 +96,7 @@ KFR_SINTRIN vec<f32, N> log(const vec<f32, N>& d)
vec<f32, N> x = (m - 1.0f) / (m + 1.0f);
vec<f32, N> x2 = x * x;
- vec<f32, N> sp = select(d < 0, c_qnan<f32>, c_neginfinity<f32>);
+ vec<f32, N> sp = select(d < 0, constants<f32>::qnan, constants<f32>::neginfinity);
vec<f32, N> t = 0.2371599674224853515625f;
t = fmadd(t, x2, 0.285279005765914916992188f);
@@ -119,7 +119,7 @@ KFR_SINTRIN vec<f64, N> log(const vec<f64, N>& d)
vec<f64, N> x = (m - 1.0) / (m + 1.0);
vec<f64, N> x2 = x * x;
- vec<f64, N> sp = select(d < 0, c_qnan<f64>, c_neginfinity<f64>);
+ vec<f64, N> sp = select(d < 0, constants<f64>::qnan, constants<f64>::neginfinity);
vec<f64, N> t = 0.148197055177935105296783;
t = fmadd(t, x2, 0.153108178020442575739679);
@@ -130,7 +130,7 @@ KFR_SINTRIN vec<f64, N> log(const vec<f64, N>& d)
t = fmadd(t, x2, 0.666666666666685503450651);
t = fmadd(t, x2, 2);
- x = x * t + c_log_2<f64> * cast<f64>(e);
+ x = x * t + constants<f64>::log_2 * cast<f64>(e);
x = select(d > 0, x, sp);
return x;
@@ -139,12 +139,12 @@ KFR_SINTRIN vec<f64, N> log(const vec<f64, N>& d)
template <typename T, size_t N, typename Tout = flt_type<T>>
KFR_SINTRIN vec<Tout, N> log2(const vec<T, N>& x)
{
- return log(cast<Tout>(x)) * c_recip_log_2<Tout>;
+ return log(cast<Tout>(x)) * constants<Tout>::recip_log_2;
}
template <typename T, size_t N, typename Tout = flt_type<T>>
KFR_SINTRIN vec<Tout, N> log10(const vec<T, N>& x)
{
- return log(cast<Tout>(x)) * c_recip_log_10<Tout>;
+ return log(cast<Tout>(x)) * constants<Tout>::recip_log_10;
}
template <size_t N>
@@ -153,7 +153,7 @@ KFR_SINTRIN vec<f32, N> exp(const vec<f32, N>& d)
const f32 ln2_part1 = 0.6931457519f;
const f32 ln2_part2 = 1.4286067653e-6f;
- vec<i32, N> q = cast<i32>(floor(d * c_recip_log_2<f32>));
+ vec<i32, N> q = cast<i32>(floor(d * constants<f32>::recip_log_2));
vec<f32, N> s, u;
s = fmadd(cast<f32>(q), -ln2_part1, d);
@@ -176,7 +176,7 @@ KFR_SINTRIN vec<f32, N> exp(const vec<f32, N>& d)
u = s * s * u + s + 1.0f;
u = vldexpk(u, q);
- u = select(d == c_neginfinity<f32>, 0.f, u);
+ u = select(d == constants<f32>::neginfinity, 0.f, u);
return u;
}
@@ -187,7 +187,7 @@ KFR_SINTRIN vec<f64, N> exp(const vec<f64, N>& d)
const f64 ln2_part1 = 0.69314717501401901245;
const f64 ln2_part2 = 5.545926273775592108e-009;
- vec<i64, N> q = cast<i64>(floor(d * c_recip_log_2<f64>));
+ vec<i64, N> q = cast<i64>(floor(d * +constants<f64>::recip_log_2));
vec<f64, N> s, u;
s = fmadd(cast<f64>(q), -ln2_part1, d);
@@ -218,19 +218,19 @@ KFR_SINTRIN vec<f64, N> exp(const vec<f64, N>& d)
u = s * s * u + s + 1.0;
u = vldexpk(u, q);
- u = select(d == c_neginfinity<f64>, 0.0, u);
+ u = select(d == constants<f64>::neginfinity, 0.0, u);
return u;
}
template <typename T, size_t N, typename Tout = flt_type<T>>
KFR_SINTRIN vec<Tout, N> exp2(const vec<T, N>& x)
{
- return exp(x * c_log_2<Tout>);
+ return exp(x * constants<Tout>::log_2);
}
template <typename T, size_t N, typename Tout = flt_type<T>>
KFR_SINTRIN vec<Tout, N> exp10(const vec<T, N>& x)
{
- return exp(x * c_log_10<Tout>);
+ return exp(x * constants<Tout>::log_10);
}
template <typename T, size_t N>
@@ -239,8 +239,8 @@ KFR_SINTRIN vec<T, N> pow(const vec<T, N>& a, const vec<T, N>& b)
const vec<T, N> t = exp(b * log(abs(a)));
const mask<T, N> isint = floor(b) == b;
const mask<T, N> iseven = (cast<itype<T>>(b) & 1) == 0;
- return select(a > T(), t,
- select(a == T(), T(1), select(isint, select(iseven, t, -t), broadcast<N>(c_qnan<T>))));
+ return select(a > T(), t, select(a == T(), T(1),
+ select(isint, select(iseven, t, -t), broadcast<N>(constants<T>::qnan))));
}
template <typename T, size_t N>
diff --git a/include/kfr/base/logical.hpp b/include/kfr/base/logical.hpp
@@ -139,23 +139,23 @@ KFR_SINTRIN bool bittestall(const i32sse& x) { return !_mm_movemask_epi8(*~x); }
KFR_SINTRIN bool bittestall(const i64sse& x) { return !_mm_movemask_epi8(*~x); }
#endif
-template <typename T, size_t N, KFR_ENABLE_IF(N < vector_width<T, cpu_t::native>)>
+template <typename T, size_t N, KFR_ENABLE_IF(N < platform<T>::vector_width)>
KFR_SINTRIN bool bittestall(const vec<T, N>& a)
{
return bittestall(expand_simd(a, internal::maskbits<T>(true)));
}
-template <typename T, size_t N, KFR_ENABLE_IF(N >= vector_width<T, cpu_t::native>), typename = void>
+template <typename T, size_t N, KFR_ENABLE_IF(N >= platform<T>::vector_width), typename = void>
KFR_SINTRIN bool bittestall(const vec<T, N>& a)
{
return bittestall(low(a)) && bittestall(high(a));
}
-template <typename T, size_t N, KFR_ENABLE_IF(N < vector_width<T, cpu_t::native>)>
+template <typename T, size_t N, KFR_ENABLE_IF(N < platform<T>::vector_width)>
KFR_SINTRIN bool bittestany(const vec<T, N>& a)
{
return bittestany(expand_simd(a, internal::maskbits<T>(false)));
}
-template <typename T, size_t N, KFR_ENABLE_IF(N >= vector_width<T, cpu_t::native>), typename = void>
+template <typename T, size_t N, KFR_ENABLE_IF(N >= platform<T>::vector_width), typename = void>
KFR_SINTRIN bool bittestany(const vec<T, N>& a)
{
return bittestany(low(a)) || bittestany(high(a));
@@ -192,23 +192,23 @@ KFR_SINTRIN bool bittestall(const i64neon& a) { return bittestall(bitcast<u32>(a
KFR_SINTRIN bool bittestall(const f32neon& a) { return bittestall(bitcast<u32>(a)); }
KFR_SINTRIN bool bittestall(const f64neon& a) { return bittestall(bitcast<u32>(a)); }
-template <typename T, size_t N, KFR_ENABLE_IF(N < vector_width<T, cpu_t::native>)>
+template <typename T, size_t N, KFR_ENABLE_IF(N < platform<T>::vector_width)>
KFR_SINTRIN bool bittestall(const vec<T, N>& a)
{
return bittestall(expand_simd(a, internal::maskbits<T>(true)));
}
-template <typename T, size_t N, KFR_ENABLE_IF(N >= vector_width<T, cpu_t::native>), typename = void>
+template <typename T, size_t N, KFR_ENABLE_IF(N >= platform<T>::vector_width), typename = void>
KFR_SINTRIN bool bittestall(const vec<T, N>& a)
{
return bittestall(low(a)) && bittestall(high(a));
}
-template <typename T, size_t N, KFR_ENABLE_IF(N < vector_width<T, cpu_t::native>)>
+template <typename T, size_t N, KFR_ENABLE_IF(N < platform<T>::vector_width)>
KFR_SINTRIN bool bittestany(const vec<T, N>& a)
{
return bittestany(expand_simd(a, internal::maskbits<T>(false)));
}
-template <typename T, size_t N, KFR_ENABLE_IF(N >= vector_width<T, cpu_t::native>), typename = void>
+template <typename T, size_t N, KFR_ENABLE_IF(N >= platform<T>::vector_width), typename = void>
KFR_SINTRIN bool bittestany(const vec<T, N>& a)
{
return bittestany(low(a)) || bittestany(high(a));
diff --git a/include/kfr/base/memory.hpp b/include/kfr/base/memory.hpp
@@ -57,7 +57,11 @@ struct mem_header
u8 reserved1;
u8 reserved2;
size_t size;
-} __attribute__((__packed__));
+}
+#ifdef CMT_GNU_ATTRIBUTES
+__attribute__((__packed__))
+#endif
+;
inline mem_header* aligned_header(void* ptr) { return ptr_cast<mem_header>(ptr) - 1; }
@@ -85,11 +89,12 @@ inline void aligned_free(void* ptr)
}
}
-template <typename T = void, size_t alignment = native_cache_alignment>
+template <typename T = void, size_t alignment = platform<>::native_cache_alignment>
CMT_INLINE T* aligned_allocate(size_t size = 1)
{
T* ptr = static_cast<T*>(CMT_ASSUME_ALIGNED(
- internal::aligned_malloc(std::max(alignment, size * details::elementsize<T>), alignment), alignment));
+ internal::aligned_malloc(std::max(alignment, size * details::elementsize<T>()), alignment),
+ alignment));
return ptr;
}
diff --git a/include/kfr/base/modzerobessel.hpp b/include/kfr/base/modzerobessel.hpp
@@ -27,9 +27,9 @@
#include "function.hpp"
#include "log_exp.hpp"
-#pragma clang diagnostic push
+CMT_PRAGMA_GNU(GCC diagnostic push)
#if CMT_HAS_WARNING("-Wc99-extensions")
-#pragma clang diagnostic ignored "-Wc99-extensions"
+CMT_PRAGMA_GNU(GCC diagnostic ignored "-Wc99-extensions")
#endif
namespace kfr
@@ -38,50 +38,49 @@ namespace kfr
namespace intrinsics
{
-template <typename T>
-constexpr T bessel_coef[] = { T(0.25),
- T(0.027777777777777776236),
- T(0.0017361111111111110147),
- T(6.9444444444444444384e-005),
- T(1.9290123456790123911e-006),
- T(3.9367598891408417495e-008),
- T(6.1511873267825652335e-010),
- T(7.5940584281266239246e-012),
- T(7.5940584281266233693e-014),
- T(6.2760813455591932909e-016),
- T(4.3583898233049949985e-018),
- T(2.5789288895295827557e-020),
- T(1.3157800456783586208e-022),
- T(5.8479113141260384983e-025),
- T(2.2843403570804837884e-027),
- T(7.904291893012054025e-030),
- T(2.4395962632753252792e-032),
- T(6.75788438580422547e-035),
- T(1.689471096451056426e-037),
- T(3.8310002187098784929e-040),
- T(7.9152897080782616517e-043),
- T(1.4962740468957016443e-045),
- T(2.5976979980828152196e-048),
- T(4.1563167969325041577e-051),
- T(6.1483976285983795968e-054),
- T(8.434015951438105991e-057),
- T(1.0757673407446563809e-059),
- T(1.2791526049282476926e-062),
- T(1.4212806721424974034e-065),
- T(1.4789601166935457918e-068),
- T(1.4442969889585408123e-071),
- T(1.3262598613026086927e-074),
- T(1.1472836170437790782e-077),
- T(9.3655805472961564331e-081),
- T(7.2265282000741942594e-084),
- T(5.2786911614858977913e-087),
- T(3.6556032974279072401e-090),
- T(2.4034209713529963119e-093),
- T(1.5021381070956226783e-096) };
-
template <typename T, size_t N>
CMT_INLINE vec<T, N> modzerobessel(const vec<T, N>& x)
{
+ constexpr static T bessel_coef[] = { T(0.25),
+ T(0.027777777777777776236),
+ T(0.0017361111111111110147),
+ T(6.9444444444444444384e-005),
+ T(1.9290123456790123911e-006),
+ T(3.9367598891408417495e-008),
+ T(6.1511873267825652335e-010),
+ T(7.5940584281266239246e-012),
+ T(7.5940584281266233693e-014),
+ T(6.2760813455591932909e-016),
+ T(4.3583898233049949985e-018),
+ T(2.5789288895295827557e-020),
+ T(1.3157800456783586208e-022),
+ T(5.8479113141260384983e-025),
+ T(2.2843403570804837884e-027),
+ T(7.904291893012054025e-030),
+ T(2.4395962632753252792e-032),
+ T(6.75788438580422547e-035),
+ T(1.689471096451056426e-037),
+ T(3.8310002187098784929e-040),
+ T(7.9152897080782616517e-043),
+ T(1.4962740468957016443e-045),
+ T(2.5976979980828152196e-048),
+ T(4.1563167969325041577e-051),
+ T(6.1483976285983795968e-054),
+ T(8.434015951438105991e-057),
+ T(1.0757673407446563809e-059),
+ T(1.2791526049282476926e-062),
+ T(1.4212806721424974034e-065),
+ T(1.4789601166935457918e-068),
+ T(1.4442969889585408123e-071),
+ T(1.3262598613026086927e-074),
+ T(1.1472836170437790782e-077),
+ T(9.3655805472961564331e-081),
+ T(7.2265282000741942594e-084),
+ T(5.2786911614858977913e-087),
+ T(3.6556032974279072401e-090),
+ T(2.4034209713529963119e-093),
+ T(1.5021381070956226783e-096) };
+
const vec<T, N> x_2 = x * 0.5;
const vec<T, N> x_2_sqr = x_2 * x_2;
vec<T, N> num = x_2_sqr;
@@ -91,7 +90,7 @@ CMT_INLINE vec<T, N> modzerobessel(const vec<T, N>& x)
CMT_LOOP_UNROLL
for (size_t i = 0; i < (sizeof(T) == 4 ? 20 : 39); i++)
{
- result = fmadd((num *= x_2_sqr), bessel_coef<T>[i], result);
+ result = fmadd((num *= x_2_sqr), bessel_coef[i], result);
}
return result;
}
@@ -113,4 +112,4 @@ KFR_INTRIN internal::expression_function<fn::modzerobessel, E1> modzerobessel(E1
}
}
-#pragma clang diagnostic pop
+CMT_PRAGMA_GNU(GCC diagnostic pop)
diff --git a/include/kfr/base/operators.hpp b/include/kfr/base/operators.hpp
@@ -83,7 +83,7 @@ KFR_FN(add)
/**
* @brief Returns template expression that returns sum of all the arguments passed to a function.
*/
-template <typename... E, KFR_ENABLE_IF(is_input_expressions<E...>::value)>
+template <typename... E, KFR_ENABLE_IF((is_input_expressions<E...>::value) && true)>
CMT_INLINE internal::expression_function<fn::add, E...> add(E&&... x)
{
return { fn::add(), std::forward<E>(x)... };
@@ -310,7 +310,7 @@ inline common_type<T1, T2> bitwiseand(const T1& x, const T2& y)
template <typename T>
constexpr inline T bitwiseand(initialvalue<T>)
{
- return internal::allones<subtype<T>>;
+ return constants<T>::allones();
}
KFR_FN(bitwiseand)
@@ -323,7 +323,7 @@ inline common_type<T1, T2> bitwiseandnot(const T1& x, const T2& y)
template <typename T>
constexpr inline T bitwiseandnot(initialvalue<T>)
{
- return internal::allones<subtype<T>>;
+ return constants<T>::allones();
}
KFR_FN(bitwiseandnot)
@@ -336,7 +336,7 @@ inline common_type<T1, T2> bitwiseor(const T1& x, const T2& y)
template <typename T>
constexpr inline T bitwiseor(initialvalue<T>)
{
- return subtype<T>();
+ return subtype<T>(0);
}
KFR_FN(bitwiseor)
@@ -512,14 +512,14 @@ KFR_FN(reciprocal)
template <typename T1, typename T2>
CMT_INLINE common_type<T1, T2> mulsign(const T1& x, const T2& y)
{
- return x ^ (y & internal::highbitmask<subtype<T2>>);
+ return x ^ (y & constants<T2>::highbitmask());
}
KFR_FN(mulsign)
template <typename T, size_t N>
constexpr CMT_INLINE vec<T, N> copysign(const vec<T, N>& x, const vec<T, N>& y)
{
- return (x & internal::highbitmask<T>) | (y & internal::highbitmask<T>);
+ return (x & constants<T>::highbitmask()) | (y & constants<T>::highbitmask());
}
template <typename T, size_t N>
@@ -531,7 +531,7 @@ CMT_INLINE mask<T, N> isnan(const vec<T, N>& x)
template <typename T, size_t N>
CMT_INLINE mask<T, N> isinf(const vec<T, N>& x)
{
- return x == c_infinity<T> || x == -c_infinity<T>;
+ return x == constants<T>::infinity || x == -constants<T>::infinity;
}
template <typename T, size_t N>
@@ -543,7 +543,7 @@ CMT_INLINE mask<T, N> isfinite(const vec<T, N>& x)
template <typename T, size_t N>
CMT_INLINE mask<T, N> isnegative(const vec<T, N>& x)
{
- return (x & internal::highbitmask<T>) != 0;
+ return (x & constants<T>::highbitmask()) != 0;
}
template <typename T, size_t N>
diff --git a/include/kfr/base/platform.hpp b/include/kfr/base/platform.hpp
@@ -93,94 +93,81 @@ CMT_UNUSED static const char* cpu_name(cpu_t set)
return "-";
}
+#ifdef CMT_ARCH_X64
+template <int = 0>
+constexpr inline const char* bitness_const(const char*, const char* x64)
+{
+ return x64;
+}
template <typename T>
-constexpr inline const T& bitness_const(const T& x32, const T& x64)
+constexpr inline const T& bitness_const(const T&, const T& x64)
{
-#ifdef CMT_ARCH_X64
- (void)x32;
return x64;
+}
#else
- (void)x64;
+template <int = 0>
+constexpr inline const char* bitness_const(const char* x32, const char*)
+{
+ return x32;
+}
+template <typename T>
+constexpr inline const T& bitness_const(const T& x32, const T&)
+{
return x32;
-#endif
}
-
-#ifdef CMT_ARCH_X64
-constexpr inline const char* bitness_const(const char*, const char* x64) { return x64; }
-#else
-constexpr inline const char* bitness_const(const char* x32, const char*) { return x32; }
#endif
-constexpr size_t native_cache_alignment = 64;
-constexpr size_t native_cache_alignment_mask = native_cache_alignment - 1;
-constexpr size_t maximum_vector_alignment = 32;
-constexpr size_t maximum_vector_alignment_mask = maximum_vector_alignment - 1;
-constexpr size_t native_register_count = bitness_const(8, 16);
+template <typename T = i32, cpu_t c = cpu_t::native>
+struct platform
+{
+ constexpr static size_t native_cache_alignment = 64;
+ constexpr static size_t native_cache_alignment_mask = native_cache_alignment - 1;
+ constexpr static size_t maximum_vector_alignment = 32;
+ constexpr static size_t maximum_vector_alignment_mask = maximum_vector_alignment - 1;
+#ifdef CMT_ARCH_X86
+ constexpr static size_t simd_register_count = bitness_const(8, 16);
+#endif
+#ifdef CMT_ARCH_ARM
+ constexpr static size_t simd_register_count = 16;
+#endif
-constexpr size_t common_float_vector_size = 16;
-constexpr size_t common_int_vector_size = 16;
+ constexpr static size_t common_float_vector_size = 16;
+ constexpr static size_t common_int_vector_size = 16;
-template <cpu_t c>
-constexpr size_t native_float_vector_size =
#ifdef CMT_ARCH_X86
- c >= cpu_t::avx1 ? 32 : c >= cpu_t::sse2 ? 16 : common_float_vector_size;
+ constexpr static size_t native_float_vector_size =
+ c >= cpu_t::avx1 ? 32 : c >= cpu_t::sse2 ? 16 : common_float_vector_size;
#endif
#ifdef CMT_ARCH_ARM
-c == cpu_t::neon ? 16 : common_float_vector_size;
+ constexpr static size_t native_float_vector_size = c == cpu_t::neon ? 16 : common_float_vector_size;
#endif
-template <cpu_t c>
-constexpr size_t native_int_vector_size =
#ifdef CMT_ARCH_X86
- c >= cpu_t::avx2 ? 32 : c >= cpu_t::sse2 ? 16 : common_int_vector_size;
+ constexpr static size_t native_int_vector_size =
+ c >= cpu_t::avx2 ? 32 : c >= cpu_t::sse2 ? 16 : common_int_vector_size;
#endif
#ifdef CMT_ARCH_ARM
-c == cpu_t::neon ? 16 : common_int_vector_size;
+ constexpr static size_t native_int_vector_size = c == cpu_t::neon ? 16 : common_int_vector_size;
#endif
-/// @brief SIMD vector width for the given cpu instruction set
-template <typename T, cpu_t c = cpu_t::native>
-constexpr size_t vector_width = const_max(size_t(1), typeclass<T> == datatype::f
- ? native_float_vector_size<c> / sizeof(T)
- : native_int_vector_size<c> / sizeof(T));
+ /// @brief SIMD vector width for the given cpu instruction set
+ constexpr static size_t vector_width =
+ (const_max(size_t(1), typeclass<T> == datatype::f ? native_float_vector_size / sizeof(T)
+ : native_int_vector_size / sizeof(T)));
-template <cpu_t c>
-constexpr size_t vector_width<void, c> = 0;
+ constexpr static size_t vector_capacity = simd_register_count * vector_width;
-namespace internal
-{
+ constexpr static size_t maximum_vector_size = const_min(static_cast<size_t>(32), vector_capacity / 4);
-template <cpu_t c>
-constexpr size_t native_vector_alignment = const_max(native_float_vector_size<c>, native_int_vector_size<c>);
+ constexpr static size_t native_vector_alignment =
+ const_max(native_float_vector_size, native_int_vector_size);
-template <cpu_t c>
-constexpr bool fast_unaligned =
+ constexpr static bool fast_unaligned =
#ifdef CMT_ARCH_X86
- c >= cpu_t::avx1;
+ c >= cpu_t::avx1;
#else
- false;
+ false;
#endif
-template <cpu_t c>
-constexpr size_t native_vector_alignment_mask = native_vector_alignment<c> - 1;
-
-template <typename T, cpu_t c>
-constexpr inline size_t get_vector_width(size_t scale = 1)
-{
- return scale * vector_width<T, c>;
-}
-template <typename T, cpu_t c>
-constexpr inline size_t get_vector_width(size_t x32scale, size_t x64scale)
-{
- return bitness_const(x32scale, x64scale) * vector_width<T, c>;
-}
-
-template <typename T, cpu_t c>
-constexpr auto vector_width_range = csize<1> << csizeseq<ilog2(vector_width<T, c>) + 1>;
-
-template <typename T, cpu_t c>
-constexpr size_t vector_capacity = native_register_count* vector_width<T, c>;
-
-template <typename T, cpu_t c>
-constexpr size_t maximum_vector_size = const_min(static_cast<size_t>(32), vector_capacity<T, c> / 4);
-}
+ constexpr static size_t native_vector_alignment_mask = native_vector_alignment - 1;
+};
}
diff --git a/include/kfr/base/pointer.hpp b/include/kfr/base/pointer.hpp
@@ -34,8 +34,10 @@ namespace kfr
constexpr size_t maximum_expression_width = bitness_const(16, 32);
+constexpr size_t expression_vtable_size = 2 + ilog2(maximum_expression_width) + 1;
+
template <typename T>
-using expression_vtable = std::array<void*, 2 + ilog2(maximum_expression_width) + 1>;
+using expression_vtable = std::array<void*, expression_vtable_size>;
struct dummy_content
{
@@ -172,12 +174,11 @@ template <typename T, typename E>
expression_vtable<T> make_expression_vtable_impl()
{
expression_vtable<T> result;
- constexpr size_t size = result.size() - 2;
result[0] = reinterpret_cast<void*>(internal::make_expression_begin_block<decay<E>>());
result[1] = reinterpret_cast<void*>(internal::make_expression_end_block<decay<E>>());
- cforeach(csizeseq<size>, [&](auto u) {
+ cforeach(csizeseq_t<expression_vtable_size - 2>(), [&](auto u) {
constexpr size_t N = 1 << val_of(decltype(u)());
result[2 + val_of(decltype(u)())] =
reinterpret_cast<void*>(internal::make_expression_func<T, N, decay<E>>());
diff --git a/include/kfr/base/random.hpp b/include/kfr/base/random.hpp
@@ -40,27 +40,33 @@ struct seed_from_rdtsc_t
constexpr seed_from_rdtsc_t seed_from_rdtsc{};
+#ifdef CMT_COMPILER_CLANG
+#define KFR_builtin_readcyclecounter() __builtin_readcyclecounter()
+#else
+#define KFR_builtin_readcyclecounter() __rdtsc()
+#endif
+
struct random_bit_generator
{
random_bit_generator(seed_from_rdtsc_t) noexcept
- : state(bitcast<u32>(make_vector(__builtin_readcyclecounter(),
- (__builtin_readcyclecounter() << 11) ^ 0x710686d615e2257bull)))
+ : state(bitcast<u32>(make_vector(KFR_builtin_readcyclecounter(),
+ (KFR_builtin_readcyclecounter() << 11) ^ 0x710686d615e2257bull)))
{
(void)operator()();
}
- constexpr random_bit_generator(u32 x0, u32 x1, u32 x2, u32 x3) noexcept : state(x0, x1, x2, x3)
+ random_bit_generator(u32 x0, u32 x1, u32 x2, u32 x3) noexcept : state(x0, x1, x2, x3)
{
(void)operator()();
}
- constexpr random_bit_generator(u64 x0, u64 x1) noexcept : state(bitcast<u32>(make_vector(x0, x1)))
+ random_bit_generator(u64 x0, u64 x1) noexcept : state(bitcast<u32>(make_vector(x0, x1)))
{
(void)operator()();
}
inline random_state operator()()
{
- constexpr static random_state mul{ 214013u, 17405u, 214013u, 69069u };
- constexpr static random_state add{ 2531011u, 10395331u, 13737667u, 1u };
+ CMT_GNU_CONSTEXPR static random_state mul{ 214013u, 17405u, 214013u, 69069u };
+ CMT_GNU_CONSTEXPR static random_state add{ 2531011u, 10395331u, 13737667u, 1u };
state = bitcast<u32>(rotateright<3>(bitcast<u8>(fmadd(state, mul, add))));
return state;
}
diff --git a/include/kfr/base/read_write.hpp b/include/kfr/base/read_write.hpp
@@ -85,19 +85,19 @@ CMT_INLINE vec<T, Nout> gather_stride_s(const T* base, size_t stride, csizes_t<I
template <typename T, size_t N>
CMT_INLINE vec<T, N> gather(const T* base, const vec<u32, N>& indices)
{
- return internal::gather(base, indices, csizeseq<N>);
+ return internal::gather(base, indices, csizeseq_t<N>());
}
template <size_t Nout, typename T>
CMT_INLINE vec<T, Nout> gather_stride(const T* base, size_t stride)
{
- return internal::gather_stride_s<Nout>(base, stride, csizeseq<Nout>);
+ return internal::gather_stride_s<Nout>(base, stride, csizeseq_t<Nout>());
}
template <size_t Nout, size_t Stride, typename T>
CMT_INLINE vec<T, Nout> gather_stride(const T* base)
{
- return internal::gather_stride<Nout, Stride>(base, csizeseq<Nout>);
+ return internal::gather_stride<Nout, Stride>(base, csizeseq_t<Nout>());
}
template <size_t groupsize, typename T, size_t N, typename IT, size_t... Indices>
@@ -108,7 +108,7 @@ CMT_INLINE vec<T, N * groupsize> gather_helper(const T* base, const vec<IT, N>&
template <size_t groupsize = 1, typename T, size_t N, typename IT>
CMT_INLINE vec<T, N * groupsize> gather(const T* base, const vec<IT, N>& offset)
{
- return gather_helper<groupsize>(base, offset, csizeseq<N>);
+ return gather_helper<groupsize>(base, offset, csizeseq_t<N>());
}
template <size_t groupsize, typename T, size_t N, size_t Nout = N* groupsize, typename IT, size_t... Indices>
@@ -121,42 +121,42 @@ CMT_INLINE void scatter_helper(T* base, const vec<IT, N>& offset, const vec<T, N
template <size_t groupsize = 1, typename T, size_t N, size_t Nout = N* groupsize, typename IT>
CMT_INLINE void scatter(T* base, const vec<IT, N>& offset, const vec<T, Nout>& value)
{
- return scatter_helper<groupsize>(base, offset, value, csizeseq<N>);
+ return scatter_helper<groupsize>(base, offset, value, csizeseq_t<N>());
}
template <typename T>
-constexpr T partial_masks[] = { internal::allones<T>,
- internal::allones<T>,
- internal::allones<T>,
- internal::allones<T>,
- internal::allones<T>,
- internal::allones<T>,
- internal::allones<T>,
- internal::allones<T>,
- internal::allones<T>,
- internal::allones<T>,
- internal::allones<T>,
- internal::allones<T>,
- internal::allones<T>,
- internal::allones<T>,
- internal::allones<T>,
- internal::allones<T>,
- internal::allones<T>,
- internal::allones<T>,
- internal::allones<T>,
- internal::allones<T>,
- internal::allones<T>,
- internal::allones<T>,
- internal::allones<T>,
- internal::allones<T>,
- internal::allones<T>,
- internal::allones<T>,
- internal::allones<T>,
- internal::allones<T>,
- internal::allones<T>,
- internal::allones<T>,
- internal::allones<T>,
- internal::allones<T>,
+constexpr T partial_masks[] = { constants<T>::allones(),
+ constants<T>::allones(),
+ constants<T>::allones(),
+ constants<T>::allones(),
+ constants<T>::allones(),
+ constants<T>::allones(),
+ constants<T>::allones(),
+ constants<T>::allones(),
+ constants<T>::allones(),
+ constants<T>::allones(),
+ constants<T>::allones(),
+ constants<T>::allones(),
+ constants<T>::allones(),
+ constants<T>::allones(),
+ constants<T>::allones(),
+ constants<T>::allones(),
+ constants<T>::allones(),
+ constants<T>::allones(),
+ constants<T>::allones(),
+ constants<T>::allones(),
+ constants<T>::allones(),
+ constants<T>::allones(),
+ constants<T>::allones(),
+ constants<T>::allones(),
+ constants<T>::allones(),
+ constants<T>::allones(),
+ constants<T>::allones(),
+ constants<T>::allones(),
+ constants<T>::allones(),
+ constants<T>::allones(),
+ constants<T>::allones(),
+ constants<T>::allones(),
T(),
T(),
T(),
diff --git a/include/kfr/base/reduce.hpp b/include/kfr/base/reduce.hpp
@@ -61,10 +61,10 @@ CMT_INLINE auto reduce_call_final(FinalFn&& finalfn, size_t, T value)
return finalfn(value);
}
-template <typename T, typename ReduceFn, typename TransformFn, typename FinalFn, cpu_t cpu = cpu_t::native>
+template <typename T, typename ReduceFn, typename TransformFn, typename FinalFn, KFR_ARCH_DEP>
struct expression_reduce : output_expression
{
- constexpr static size_t width = vector_width<T, cpu> * bitness_const(1, 2);
+ constexpr static size_t width = platform<T>::vector_width * bitness_const(1, 2);
using value_type = T;
@@ -85,16 +85,16 @@ struct expression_reduce : output_expression
protected:
void reset() { counter = 0; }
- CMT_INLINE void process(vec<T, width> x) const { value = reducefn(transformfn(x), value); }
+ CMT_INLINE void process(const vec<T, width>& x) const { value = reducefn(transformfn(x), value); }
template <size_t N, KFR_ENABLE_IF(N < width)>
- CMT_INLINE void process(vec<T, N> x) const
+ CMT_INLINE void process(const vec<T, N>& x) const
{
value = combine(value, reducefn(transformfn(x), narrow<N>(value)));
}
template <size_t N, KFR_ENABLE_IF(N > width)>
- CMT_INLINE void process(vec<T, N> x) const
+ CMT_INLINE void process(const vec<T, N>& x) const
{
process(low(x));
process(high(x));
@@ -110,14 +110,14 @@ protected:
template <typename ReduceFn, typename TransformFn = fn::pass_through, typename FinalFn = fn::pass_through,
typename E1, typename T = value_type_of<E1>>
-KFR_SINTRIN T reduce(E1&& e1, ReduceFn&& reducefn, TransformFn&& transformfn = fn::pass_through(),
+KFR_SINTRIN T reduce(const E1& e1, ReduceFn&& reducefn, TransformFn&& transformfn = fn::pass_through(),
FinalFn&& finalfn = fn::pass_through())
{
static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use slice())");
using reducer_t = internal::expression_reduce<T, decay<ReduceFn>, decay<TransformFn>, decay<FinalFn>>;
reducer_t red(std::forward<ReduceFn>(reducefn), std::forward<TransformFn>(transformfn),
std::forward<FinalFn>(finalfn));
- process(red, std::forward<E1>(e1));
+ process(red, e1);
return red.get();
}
@@ -133,10 +133,10 @@ KFR_FN(reduce)
* \f]
*/
template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_SINTRIN T sum(E1&& x)
+KFR_SINTRIN T sum(const E1& x)
{
static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use slice())");
- return reduce(std::forward<E1>(x), fn::add());
+ return reduce(x, fn::add());
}
/**
@@ -148,10 +148,10 @@ KFR_SINTRIN T sum(E1&& x)
* \f]
*/
template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_SINTRIN T mean(E1&& x)
+KFR_SINTRIN T mean(const E1& x)
{
static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use slice())");
- return reduce(std::forward<E1>(x), fn::add(), fn::pass_through(), fn::final_mean());
+ return reduce(x, fn::add(), fn::pass_through(), fn::final_mean());
}
/**
@@ -160,10 +160,10 @@ KFR_SINTRIN T mean(E1&& x)
* x must have its size and type specified.
*/
template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_SINTRIN T minof(E1&& x)
+KFR_SINTRIN T minof(const E1& x)
{
static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use slice())");
- return reduce(std::forward<E1>(x), fn::min());
+ return reduce(x, fn::min());
}
/**
@@ -172,10 +172,10 @@ KFR_SINTRIN T minof(E1&& x)
* x must have its size and type specified.
*/
template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_SINTRIN T maxof(E1&& x)
+KFR_SINTRIN T maxof(const E1& x)
{
static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use slice())");
- return reduce(std::forward<E1>(x), fn::max());
+ return reduce(x, fn::max());
}
/**
@@ -184,10 +184,10 @@ KFR_SINTRIN T maxof(E1&& x)
* x must have its size and type specified.
*/
template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_SINTRIN T absminof(E1&& x)
+KFR_SINTRIN T absminof(const E1& x)
{
static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use slice())");
- return reduce(std::forward<E1>(x), fn::absmin());
+ return reduce(x, fn::absmin());
}
/**
@@ -196,10 +196,10 @@ KFR_SINTRIN T absminof(E1&& x)
* x must have its size and type specified.
*/
template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_SINTRIN T absmaxof(E1&& x)
+KFR_SINTRIN T absmaxof(const E1& x)
{
static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use slice())");
- return reduce(std::forward<E1>(x), fn::absmax());
+ return reduce(x, fn::absmax());
}
/**
@@ -229,10 +229,10 @@ KFR_SINTRIN T dotproduct(E1&& x, E2&& y)
\f]
*/
template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_SINTRIN T rms(E1&& x)
+KFR_SINTRIN T rms(const E1& x)
{
static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use slice())");
- return reduce(std::forward<E1>(x), fn::add(), fn::sqr(), fn::final_rootmean());
+ return reduce(x, fn::add(), fn::sqr(), fn::final_rootmean());
}
/**
@@ -244,10 +244,10 @@ KFR_SINTRIN T rms(E1&& x)
\f]
*/
template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_SINTRIN T sumsqr(E1&& x)
+KFR_SINTRIN T sumsqr(const E1& x)
{
static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use slice())");
- return reduce(std::forward<E1>(x), fn::add(), fn::sqr());
+ return reduce(x, fn::add(), fn::sqr());
}
/**
@@ -259,9 +259,9 @@ KFR_SINTRIN T sumsqr(E1&& x)
\f]
*/
template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>::value)>
-KFR_SINTRIN T product(E1&& x)
+KFR_SINTRIN T product(const E1& x)
{
static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use slice())");
- return reduce(std::forward<E1>(x), fn::mul());
+ return reduce(x, fn::mul());
}
}
diff --git a/include/kfr/base/select.hpp b/include/kfr/base/select.hpp
@@ -121,12 +121,12 @@ KFR_SINTRIN i64avx select(const mi64avx& m, const i64avx& x, const i64avx& y)
}
#endif
-template <typename T, size_t N, KFR_ENABLE_IF(N < vector_width<T, cpu_t::native>)>
+template <typename T, size_t N, KFR_ENABLE_IF(N < platform<T>::vector_width)>
KFR_SINTRIN vec<T, N> select(const mask<T, N>& a, const vec<T, N>& b, const vec<T, N>& c)
{
return slice<0, N>(select(expand_simd(a).asmask(), expand_simd(b), expand_simd(c)));
}
-template <typename T, size_t N, KFR_ENABLE_IF(N >= vector_width<T, cpu_t::native>), typename = void>
+template <typename T, size_t N, KFR_ENABLE_IF(N >= platform<T>::vector_width), typename = void>
KFR_SINTRIN vec<T, N> select(const mask<T, N>& a, const vec<T, N>& b, const vec<T, N>& c)
{
return concat(select(low(a).asmask(), low(b), low(c)), select(high(a).asmask(), high(b), high(c)));
@@ -178,12 +178,12 @@ KFR_SINTRIN f64neon select(const mf64neon& m, const f64neon& x, const f64neon& y
}
#endif
-template <typename T, size_t N, KFR_ENABLE_IF(N < vector_width<T, cpu_t::native>)>
+template <typename T, size_t N, KFR_ENABLE_IF(N < platform<T>::vector_width)>
KFR_SINTRIN vec<T, N> select(const mask<T, N>& a, const vec<T, N>& b, const vec<T, N>& c)
{
return slice<0, N>(select(expand_simd(a).asmask(), expand_simd(b), expand_simd(c)));
}
-template <typename T, size_t N, KFR_ENABLE_IF(N >= vector_width<T, cpu_t::native>), typename = void>
+template <typename T, size_t N, KFR_ENABLE_IF(N >= platform<T>::vector_width), typename = void>
KFR_SINTRIN vec<T, N> select(const mask<T, N>& a, const vec<T, N>& b, const vec<T, N>& c)
{
return concat(select(low(a).asmask(), low(b), low(c)), select(high(a).asmask(), high(b), high(c)));
diff --git a/include/kfr/base/shuffle.hpp b/include/kfr/base/shuffle.hpp
@@ -40,7 +40,7 @@ namespace internal
template <size_t index, typename T>
constexpr CMT_INLINE T broadcast_get_nth()
{
- return c_qnan<T>;
+ return constants<T>::qnan;
}
template <size_t index, typename T, typename... Ts>
@@ -51,17 +51,17 @@ constexpr CMT_INLINE T broadcast_get_nth(T x, Ts... rest)
template <typename T, typename... Ts, size_t... indices, size_t Nin = 1 + sizeof...(Ts),
size_t Nout = sizeof...(indices)>
-CMT_INLINE constexpr vec<T, Nout> broadcast_helper(csizes_t<indices...>, T x, Ts... rest)
+CMT_GNU_CONSTEXPR CMT_INLINE vec<T, Nout> broadcast_helper(csizes_t<indices...>, T x, Ts... rest)
{
- simd<T, Nout> result{ broadcast_get_nth<indices % Nin>(x, rest...)... };
- return result;
+ using simd_t = simd<T, Nout>;
+ return KFR_SIMD_SET(simd_t, broadcast_get_nth<indices % Nin>(x, rest...)...);
}
}
template <size_t Nout, typename T, typename... Ts>
-constexpr CMT_INLINE vec<T, Nout> broadcast(T x, T y, Ts... rest)
+CMT_GNU_CONSTEXPR CMT_INLINE vec<T, Nout> broadcast(T x, T y, Ts... rest)
{
- return internal::broadcast_helper(csizeseq<Nout>, x, y, rest...);
+ return internal::broadcast_helper(csizeseq_t<Nout>(), x, y, rest...);
}
KFR_FN(broadcast)
@@ -215,10 +215,9 @@ struct shuffle_index_shuffle
constexpr static size_t indexcount = sizeof...(Indices);
template <size_t index>
- constexpr inline size_t operator()() const
+ constexpr inline size_t operator()(csize_t<index>) const
{
- constexpr int result = csizes_t<Indices...>::get(csize<index % indexcount>);
- return result + index / indexcount * indexcount;
+ return csizes_t<Indices...>::get(csize_t<index % indexcount>()) + index / indexcount * indexcount;
}
};
}
@@ -247,11 +246,9 @@ struct shuffle_index_permute
constexpr static size_t indexcount = sizeof...(Indices);
template <size_t index>
- constexpr inline size_t operator()() const
+ constexpr inline size_t operator()(csize_t<index>) const
{
- constexpr size_t result = csizes_t<Indices...>::get(csize<index % indexcount>);
- static_assert(result < size, "result < size");
- return result + index / indexcount * indexcount;
+ return csizes_t<Indices...>::get(csize_t<index % indexcount>()) + index / indexcount * indexcount;
}
};
}
@@ -341,9 +338,9 @@ struct shuffle_index_blend
constexpr static size_t indexcount = sizeof...(Indices);
template <size_t index>
- constexpr inline size_t operator()() const
+ constexpr inline size_t operator()(csize_t<index>) const
{
- return (elements_t<Indices...>::get(csize<index % indexcount>) ? size : 0) + index % size;
+ return (elements_t<Indices...>::get(csize_t<index % indexcount>()) ? size : 0) + index % size;
}
};
}
@@ -434,8 +431,7 @@ struct shuffle_index_transpose
{
constexpr inline size_t operator()(size_t index) const
{
- constexpr size_t side2 = size / side1;
- return index % side2 * side1 + index / side2;
+ return index % (size / side1) * side1 + index / (size / side1);
}
};
}
diff --git a/include/kfr/base/simd.hpp b/include/kfr/base/simd.hpp
@@ -28,6 +28,9 @@
#include "kfr.h"
#include "types.hpp"
+CMT_PRAGMA_MSVC(warning(push))
+CMT_PRAGMA_MSVC(warning(disable : 4324))
+
namespace kfr
{
@@ -72,10 +75,11 @@ CMT_INLINE simd<T, N> simd_read(const T* src)
template <size_t N, bool A = false, typename T, KFR_ENABLE_IF(!is_poweroftwo(N)), typename = void>
CMT_INLINE simd<T, N> simd_read(const T* src)
{
- constexpr size_t first = prev_poweroftwo(N);
- constexpr size_t rest = N - first;
- constexpr auto extend_indices = cconcat(csizeseq<rest>, csizeseq<first - rest, index_undefined, 0>);
- constexpr auto concat_indices = csizeseq<N>;
+ constexpr size_t first = prev_poweroftwo(N);
+ constexpr size_t rest = N - first;
+ constexpr auto extend_indices =
+ cconcat(csizeseq_t<rest>(), csizeseq_t<first - rest, index_undefined, 0>());
+ constexpr auto concat_indices = cvalseq_t<size_t, N>();
return simd_shuffle<T, first>(simd_read<first, A>(src),
simd_shuffle<T, rest>(simd_read<rest, false>(src + first), extend_indices),
concat_indices);
@@ -92,10 +96,11 @@ CMT_INLINE void simd_write(T* dest, const simd<T, N>& value)
{
constexpr size_t first = prev_poweroftwo(N);
constexpr size_t rest = N - first;
- simd_write<A, first>(dest, simd_shuffle(value, csizeseq<first>));
- simd_write<false, rest>(dest + first, simd_shuffle(value, csizeseq<rest, first>));
+ simd_write<A, first>(dest, simd_shuffle(value, csizeseq_t<first>()));
+ simd_write<false, rest>(dest + first, simd_shuffle(value, csizeseq_t<rest, first>()));
}
+#define KFR_SIMD_SET(T, ...) (T{ __VA_ARGS__ })
#define KFR_SIMD_CAST(T, N, X) __builtin_convertvector(X, ::kfr::simd<T, N>)
#define KFR_SIMD_BITCAST(T, N, X) ((::kfr::simd<T, N>)(X))
#define KFR_SIMD_BROADCAST(T, N, X) ((::kfr::simd<T, N>)(X))
@@ -107,6 +112,9 @@ namespace internal
{
template <typename T>
+constexpr inline T maskbits(bool value);
+
+template <typename T>
struct simd_float_ops
{
constexpr static T neg(T x) { return -x; }
@@ -144,74 +152,74 @@ struct simd_int_ops : simd_float_ops<T>
}
template <typename T, size_t N>
-struct alignas(next_poweroftwo(N * sizeof(T))) simd
+struct alignas(const_min(size_t(64), next_poweroftwo(N * sizeof(T)))) simd
{
using ops =
conditional<std::is_floating_point<T>::value, internal::simd_float_ops<T>, internal::simd_int_ops<T>>;
- constexpr static simd broadcast(T value) { return broadcast_impl(value, csizeseq<N>); }
+ constexpr static simd broadcast(T value) { return broadcast_impl(value, cvalseq_t<size_t, N>()); }
constexpr friend simd operator+(const simd& x) { return x; }
- constexpr friend simd operator-(const simd& x) { return op_impl<ops::neg>(x, csizeseq<N>); }
- constexpr friend simd operator~(const simd& x) { return op_impl<ops::bnot>(x, csizeseq<N>); }
+ constexpr friend simd operator-(const simd& x) { return op_impl<ops::neg>(x, cvalseq_t<size_t, N>()); }
+ constexpr friend simd operator~(const simd& x) { return op_impl<ops::bnot>(x, cvalseq_t<size_t, N>()); }
constexpr friend simd operator+(const simd& x, const simd& y)
{
- return op_impl<ops::add>(x, y, csizeseq<N>);
+ return op_impl<ops::add>(x, y, cvalseq_t<size_t, N>());
}
constexpr friend simd operator-(const simd& x, const simd& y)
{
- return op_impl<ops::sub>(x, y, csizeseq<N>);
+ return op_impl<ops::sub>(x, y, cvalseq_t<size_t, N>());
}
constexpr friend simd operator*(const simd& x, const simd& y)
{
- return op_impl<ops::mul>(x, y, csizeseq<N>);
+ return op_impl<ops::mul>(x, y, cvalseq_t<size_t, N>());
}
constexpr friend simd operator/(const simd& x, const simd& y)
{
- return op_impl<ops::div>(x, y, csizeseq<N>);
+ return op_impl<ops::div>(x, y, cvalseq_t<size_t, N>());
}
constexpr friend simd operator&(const simd& x, const simd& y)
{
- return op_impl<ops::band>(x, y, csizeseq<N>);
+ return op_impl<ops::band>(x, y, cvalseq_t<size_t, N>());
}
constexpr friend simd operator|(const simd& x, const simd& y)
{
- return op_impl<ops::bor>(x, y, csizeseq<N>);
+ return op_impl<ops::bor>(x, y, cvalseq_t<size_t, N>());
}
constexpr friend simd operator^(const simd& x, const simd& y)
{
- return op_impl<ops::bxor>(x, y, csizeseq<N>);
+ return op_impl<ops::bxor>(x, y, cvalseq_t<size_t, N>());
}
constexpr friend simd operator<<(const simd& x, const simd& y)
{
- return op_impl<ops::shl>(x, y, csizeseq<N>);
+ return op_impl<ops::shl>(x, y, cvalseq_t<size_t, N>());
}
constexpr friend simd operator>>(const simd& x, const simd& y)
{
- return op_impl<ops::shr>(x, y, csizeseq<N>);
+ return op_impl<ops::shr>(x, y, cvalseq_t<size_t, N>());
}
constexpr friend simd operator==(const simd& x, const simd& y)
{
- return op_impl<ops::eq>(x, y, csizeseq<N>);
+ return op_impl<ops::eq>(x, y, cvalseq_t<size_t, N>());
}
constexpr friend simd operator!=(const simd& x, const simd& y)
{
- return op_impl<ops::ne>(x, y, csizeseq<N>);
+ return op_impl<ops::ne>(x, y, cvalseq_t<size_t, N>());
}
constexpr friend simd operator<(const simd& x, const simd& y)
{
- return op_impl<ops::lt>(x, y, csizeseq<N>);
+ return op_impl<ops::lt>(x, y, cvalseq_t<size_t, N>());
}
constexpr friend simd operator>(const simd& x, const simd& y)
{
- return op_impl<ops::gt>(x, y, csizeseq<N>);
+ return op_impl<ops::gt>(x, y, cvalseq_t<size_t, N>());
}
constexpr friend simd operator<=(const simd& x, const simd& y)
{
- return op_impl<ops::le>(x, y, csizeseq<N>);
+ return op_impl<ops::le>(x, y, cvalseq_t<size_t, N>());
}
constexpr friend simd operator>=(const simd& x, const simd& y)
{
- return op_impl<ops::ge>(x, y, csizeseq<N>);
+ return op_impl<ops::ge>(x, y, cvalseq_t<size_t, N>());
}
constexpr T operator[](size_t index) const { return items[index]; }
T& operator[](size_t index) { return items[index]; }
@@ -220,29 +228,29 @@ struct alignas(next_poweroftwo(N * sizeof(T))) simd
template <typename U>
constexpr simd<U, N> cast() const
{
- return cast_impl<U>(*this, csizeseq<N>);
+ return cast_impl<U>(*this, cvalseq_t<size_t, N>());
}
private:
template <typename U, size_t... indices>
constexpr static simd<U, N> cast_impl(const simd& x, csizes_t<indices...>)
{
- return simd<U, N>{ static_cast<U>(x.items[indices])... };
+ return simd<U, N>{ { static_cast<U>(x.items[indices])... } };
}
template <T (*fn)(T), size_t... indices>
constexpr static simd op_impl(const simd& x, csizes_t<indices...>)
{
- return simd{ fn(x.items[indices])... };
+ return simd{ { fn(x.items[indices])... } };
}
template <T (*fn)(T, T), size_t... indices>
constexpr static simd op_impl(const simd& x, const simd& y, csizes_t<indices...>)
{
- return simd{ fn(x.items[indices], y.items[indices])... };
+ return simd{ { fn(x.items[indices], y.items[indices])... } };
}
template <size_t... indices>
constexpr static simd broadcast_impl(T value, csizes_t<indices...>)
{
- return simd{ ((void)indices, value)... };
+ return simd{ { ((void)indices, value)... } };
}
};
@@ -256,12 +264,12 @@ template <typename T, size_t N, int... indices>
constexpr CMT_INLINE simd<T, sizeof...(indices)> simd_shuffle(const simd<T, N>& x, const simd<T, N>& y,
cints_t<indices...>) noexcept
{
- return simd<T, sizeof...(indices)>{ (indices == -1 ? T()
- : ((indices >= N) ? y[indices - N] : x[indices]))... };
+ return simd<T, sizeof...(indices)>{ { (
+ indices == -1 ? T() : ((indices >= N) ? y[indices - N] : x[indices]))... } };
}
template <typename To, typename From, size_t N, size_t Nout = N * sizeof(From) / sizeof(To)>
-constexpr CMT_INLINE simd<To, Nout> simd_bitcast(const simd<From, N>& value) noexcept
+CMT_INLINE simd<To, Nout> simd_bitcast(const simd<From, N>& value) noexcept
{
union {
const simd<From, N> from;
@@ -297,19 +305,22 @@ CMT_INLINE void simd_write_impl(T* dest, const simd<T, N>& value, ctrue_t)
template <size_t N, bool A = false, typename T>
CMT_INLINE simd<T, N> simd_read(const T* src)
{
- return simd_read_impl<N>(src, cbool<A>);
+ return simd_read_impl<N>(src, cbool_t<A>());
}
template <bool A = false, size_t N, typename T>
CMT_INLINE void simd_write(T* dest, const simd<T, N>& value)
{
- return simd_write_impl<N>(dest, value, cbool<A>);
+ return simd_write_impl<N>(dest, value, cbool_t<A>());
}
-#define KFR_SIMD_CAST(T, N, X) (::kfr::simd_cast<T>(X))
-#define KFR_SIMD_BITCAST(T, N, X) (::kfr::simd_bitcast<T>(X))
+#define KFR_SIMD_SET(T, ...) (T{ { __VA_ARGS__ } })
+#define KFR_SIMD_CAST(T, N, X) ((void)N, ::kfr::simd_cast<T>(X))
+#define KFR_SIMD_BITCAST(T, N, X) ((void)N, ::kfr::simd_bitcast<T>(X))
#define KFR_SIMD_BROADCAST(T, N, X) (::kfr::simd<T, N>::broadcast(X))
-#define KFR_SIMD_SHUFFLE(X, Y, ...) simd_shuffle(X, Y, cints<__VA_ARGS__>)
+#define KFR_SIMD_SHUFFLE(X, Y, ...) simd_shuffle(X, Y, cints_t<__VA_ARGS__>())
#endif
}
+
+CMT_PRAGMA_MSVC(warning(pop))
diff --git a/include/kfr/base/sin_cos.hpp b/include/kfr/base/sin_cos.hpp
@@ -35,7 +35,7 @@
#include "shuffle.hpp"
#if CMT_HAS_WARNING("-Wc99-extensions")
-#pragma clang diagnostic ignored "-Wc99-extensions"
+CMT_PRAGMA_GNU(GCC diagnostic ignored "-Wc99-extensions")
#endif
namespace kfr
@@ -44,16 +44,6 @@ namespace kfr
namespace intrinsics
{
-template <typename T>
-constexpr static T fold_constant_div = choose_const<T>(0x1.921fb6p-1f, 0x1.921fb54442d18p-1);
-
-template <typename T>
-constexpr static T fold_constant_hi = choose_const<T>(0x1.922000p-1f, 0x1.921fb40000000p-1);
-template <typename T>
-constexpr static T fold_constant_rem1 = choose_const<T>(-0x1.2ae000p-19f, 0x1.4442d00000000p-25);
-template <typename T>
-constexpr static T fold_constant_rem2 = choose_const<T>(-0x1.de973ep-32f, 0x1.8469898cc5170p-49);
-
template <typename T, size_t N>
KFR_SINTRIN vec<T, N> trig_horner(const vec<T, N>&, const mask<T, N>& msk, const T& a0, const T& b0)
{
@@ -70,9 +60,9 @@ KFR_SINTRIN vec<T, N> trig_horner(const vec<T, N>& x, const mask<T, N>& msk, con
template <typename T, size_t N, typename Tprecise = f64>
KFR_SINTRIN vec<T, N> trig_fold(const vec<T, N>& x, vec<itype<T>, N>& quadrant)
{
- const vec<T, N> xabs = abs(x);
- constexpr vec<T, N> div = fold_constant_div<T>;
- vec<T, N> y = floor(xabs / div);
+ const vec<T, N> xabs = abs(x);
+ constexpr T div = constants<T>::fold_constant_div;
+ vec<T, N> y = floor(xabs / div);
quadrant = cast<itype<T>>(y - floor(y * T(1.0 / 16.0)) * T(16.0));
const mask<T, N> msk = bitcast<T>((quadrant & 1) != 0);
@@ -80,25 +70,25 @@ KFR_SINTRIN vec<T, N> trig_fold(const vec<T, N>& x, vec<itype<T>, N>& quadrant)
y = select(msk, y + T(1.0), y);
quadrant = quadrant & 7;
- constexpr vec<Tprecise, N> hi = cast<Tprecise>(fold_constant_hi<T>);
- constexpr vec<T, N> rem1 = fold_constant_rem1<T>;
- constexpr vec<T, N> rem2 = fold_constant_rem2<T>;
+ constexpr Tprecise hi = cast<Tprecise>(constants<T>::fold_constant_hi);
+ constexpr T rem1 = constants<T>::fold_constant_rem1;
+ constexpr T rem2 = constants<T>::fold_constant_rem2;
return cast<T>(cast<Tprecise>(xabs) - cast<Tprecise>(y) * hi) - y * rem1 - y * rem2;
}
template <size_t N>
KFR_SINTRIN vec<f32, N> trig_sincos(const vec<f32, N>& folded, const mask<f32, N>& cosmask)
{
- constexpr f32 sin_c2 = -0x2.aaaaacp-4f;
- constexpr f32 sin_c4 = 0x2.222334p-8f;
- constexpr f32 sin_c6 = -0xd.0566ep-16f;
- constexpr f32 sin_c8 = 0x3.64cc1cp-20f;
- constexpr f32 sin_c10 = -0x5.6c4a4p-24f;
- constexpr f32 cos_c2 = -0x8.p-4f;
- constexpr f32 cos_c4 = 0xa.aaaabp-8f;
- constexpr f32 cos_c6 = -0x5.b05d48p-12f;
- constexpr f32 cos_c8 = 0x1.a065f8p-16f;
- constexpr f32 cos_c10 = -0x4.cd156p-24f;
+ constexpr f32 sin_c2 = CMT_FP(-0x2.aaaaacp-4f, -1.6666667163e-01f);
+ constexpr f32 sin_c4 = CMT_FP(0x2.222334p-8f, 8.3333970979e-03f);
+ constexpr f32 sin_c6 = CMT_FP(-0xd.0566ep-16f, -1.9868623349e-04f);
+ constexpr f32 sin_c8 = CMT_FP(0x3.64cc1cp-20f, 3.2365221614e-06f);
+ constexpr f32 sin_c10 = CMT_FP(-0x5.6c4a4p-24f, -3.2323646337e-07f);
+ constexpr f32 cos_c2 = CMT_FP(-0x8.p-4f, -5.0000000000e-01f);
+ constexpr f32 cos_c4 = CMT_FP(0xa.aaaabp-8f, 4.1666667908e-02f);
+ constexpr f32 cos_c6 = CMT_FP(-0x5.b05d48p-12f, -1.3888973044e-03f);
+ constexpr f32 cos_c8 = CMT_FP(0x1.a065f8p-16f, 2.4819273676e-05f);
+ constexpr f32 cos_c10 = CMT_FP(-0x4.cd156p-24f, -2.8616830150e-07f);
const vec<f32, N> x2 = folded * folded;
@@ -112,22 +102,22 @@ KFR_SINTRIN vec<f32, N> trig_sincos(const vec<f32, N>& folded, const mask<f32, N
template <size_t N>
KFR_SINTRIN vec<f64, N> trig_sincos(const vec<f64, N>& folded, const mask<f64, N>& cosmask)
{
- constexpr f64 sin_c2 = -0x2.aaaaaaaaaaaaap-4;
- constexpr f64 sin_c4 = 0x2.22222222220cep-8;
- constexpr f64 sin_c6 = -0xd.00d00cffd6618p-16;
- constexpr f64 sin_c8 = 0x2.e3bc744fb879ep-20;
- constexpr f64 sin_c10 = -0x6.b99034c1467a4p-28;
- constexpr f64 sin_c12 = 0xb.0711ea8fe8ee8p-36;
- constexpr f64 sin_c14 = -0xb.7e010897e55dp-44;
- constexpr f64 sin_c16 = -0xb.64eac07f1d6bp-48;
- constexpr f64 cos_c2 = -0x8.p-4;
- constexpr f64 cos_c4 = 0xa.aaaaaaaaaaaa8p-8;
- constexpr f64 cos_c6 = -0x5.b05b05b05ad28p-12;
- constexpr f64 cos_c8 = 0x1.a01a01a0022e6p-16;
- constexpr f64 cos_c10 = -0x4.9f93ed845de2cp-24;
- constexpr f64 cos_c12 = 0x8.f76bc015abe48p-32;
- constexpr f64 cos_c14 = -0xc.9bf2dbe00379p-40;
- constexpr f64 cos_c16 = 0xd.1232ac32f7258p-48;
+ constexpr f64 sin_c2 = CMT_FP(-0x2.aaaaaaaaaaaaap-4, -1.666666666666666574e-01);
+ constexpr f64 sin_c4 = CMT_FP(0x2.22222222220cep-8, 8.333333333333038315e-03);
+ constexpr f64 sin_c6 = CMT_FP(-0xd.00d00cffd6618p-16, -1.984126984092335463e-04);
+ constexpr f64 sin_c8 = CMT_FP(0x2.e3bc744fb879ep-20, 2.755731902164406591e-06);
+ constexpr f64 sin_c10 = CMT_FP(-0x6.b99034c1467a4p-28, -2.505204327429436704e-08);
+ constexpr f64 sin_c12 = CMT_FP(0xb.0711ea8fe8ee8p-36, 1.604729496525771112e-10);
+ constexpr f64 sin_c14 = CMT_FP(-0xb.7e010897e55dp-44, -6.532561241665605726e-13);
+ constexpr f64 sin_c16 = CMT_FP(-0xb.64eac07f1d6bp-48, -4.048035517573349688e-14);
+ constexpr f64 cos_c2 = CMT_FP(-0x8.p-4, -5.000000000000000000e-01);
+ constexpr f64 cos_c4 = CMT_FP(0xa.aaaaaaaaaaaa8p-8, 4.166666666666666435e-02);
+ constexpr f64 cos_c6 = CMT_FP(-0x5.b05b05b05ad28p-12, -1.388888888888844490e-03);
+ constexpr f64 cos_c8 = CMT_FP(0x1.a01a01a0022e6p-16, 2.480158730125666056e-05);
+ constexpr f64 cos_c10 = CMT_FP(-0x4.9f93ed845de2cp-24, -2.755731909937878141e-07);
+ constexpr f64 cos_c12 = CMT_FP(0x8.f76bc015abe48p-32, 2.087673146642573010e-09);
+ constexpr f64 cos_c14 = CMT_FP(-0xc.9bf2dbe00379p-40, -1.146797738558921387e-11);
+ constexpr f64 cos_c16 = CMT_FP(0xd.1232ac32f7258p-48, 4.643782497495272199e-14);
vec<f64, N> x2 = folded * folded;
vec<f64, N> formula =
@@ -193,7 +183,7 @@ KFR_SINTRIN vec<T, N> cos(const vec<T, N>& x)
template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)>
KFR_SINTRIN vec<T, N> fastsin(const vec<T, N>& x)
{
- constexpr vec<T, N> msk = broadcast<N>(internal::highbitmask<T>);
+ CMT_GNU_CONSTEXPR vec<T, N> msk = broadcast<N>(constants<T>::highbitmask());
constexpr static T c2 = -0.16665853559970855712890625;
constexpr static T c4 = +8.31427983939647674560546875e-3;
@@ -238,7 +228,7 @@ KFR_SINTRIN vec<T, N> cossin(const vec<T, N>& x)
template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)>
KFR_SINTRIN vec<T, N> sinc(const vec<T, N>& x)
{
- return select(abs(x) <= c_epsilon<T>, T(1), sin(x) / x);
+ return select(abs(x) <= constants<T>::epsilon, T(1), sin(x) / x);
}
template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = flt_type<T>>
@@ -294,37 +284,37 @@ KFR_I_FLT_CONVERTER(sinc)
template <typename T, typename Tout = flt_type<T>>
KFR_SINTRIN Tout sindeg(const T& x)
{
- return sin(x * c_degtorad<Tout>);
+ return sin(x * constants<Tout>::degtorad);
}
template <typename T, typename Tout = flt_type<T>>
KFR_SINTRIN Tout cosdeg(const T& x)
{
- return cos(x * c_degtorad<Tout>);
+ return cos(x * constants<Tout>::degtorad);
}
template <typename T, typename Tout = flt_type<T>>
KFR_SINTRIN Tout fastsindeg(const T& x)
{
- return fastsin(x * c_degtorad<Tout>);
+ return fastsin(x * constants<Tout>::degtorad);
}
template <typename T, typename Tout = flt_type<T>>
KFR_SINTRIN Tout fastcosdeg(const T& x)
{
- return fastcos(x * c_degtorad<Tout>);
+ return fastcos(x * constants<Tout>::degtorad);
}
template <typename T, typename Tout = flt_type<T>>
KFR_SINTRIN Tout sincosdeg(const T& x)
{
- return sincos(x * c_degtorad<Tout>);
+ return sincos(x * constants<Tout>::degtorad);
}
template <typename T, typename Tout = flt_type<T>>
KFR_SINTRIN Tout cossindeg(const T& x)
{
- return cossin(x * c_degtorad<Tout>);
+ return cossin(x * constants<Tout>::degtorad);
}
}
diff --git a/include/kfr/base/sort.hpp b/include/kfr/base/sort.hpp
@@ -45,7 +45,7 @@ CMT_INLINE vec<T, N> sort(const vec<T, N>& x)
constexpr size_t Nhalf = N / 2;
vec<T, Nhalf> e = low(x);
vec<T, Nhalf> o = high(x);
- constexpr auto blend0 = cconcat(csizes<1>, csizeseq<Nhalf - 1, 0, 0>);
+ constexpr auto blend0 = cconcat(csizes_t<1>(), csizeseq_t<Nhalf - 1, 0, 0>());
for (size_t i = 0; i < Nhalf; i++)
{
vec<T, Nhalf> t;
@@ -78,7 +78,7 @@ CMT_INLINE vec<T, N> sortdesc(const vec<T, N>& x)
constexpr size_t Nhalf = N / 2;
vec<T, Nhalf> e = low(x);
vec<T, Nhalf> o = high(x);
- constexpr auto blend0 = cconcat(csizes<1>, csizeseq<Nhalf - 1, 0, 0>);
+ constexpr auto blend0 = cconcat(csizes_t<1>(), csizeseq_t<Nhalf - 1, 0, 0>());
for (size_t i = 0; i < Nhalf; i++)
{
vec<T, Nhalf> t;
diff --git a/include/kfr/base/tan.hpp b/include/kfr/base/tan.hpp
@@ -64,19 +64,19 @@ KFR_SINTRIN vec<f32, N> tan(const vec<f32, N>& x_full)
mask<f32, N> inverse;
const vec<f32, N> x = trig_fold_simple(x_full, inverse);
- constexpr f32 tan_c2 = 0x5.555378p-4;
- constexpr f32 tan_c4 = 0x2.225bb8p-4;
- constexpr f32 tan_c6 = 0xd.ac3fep-8;
- constexpr f32 tan_c8 = 0x6.41644p-8;
- constexpr f32 tan_c10 = 0xc.bfe7ep-12;
- constexpr f32 tan_c12 = 0x2.6754dp-8;
-
- constexpr f32 cot_c2 = -0x5.555558p-4;
- constexpr f32 cot_c4 = -0x5.b0581p-8;
- constexpr f32 cot_c6 = -0x8.ac5ccp-12;
- constexpr f32 cot_c8 = -0xd.aaa01p-16;
- constexpr f32 cot_c10 = -0x1.a9a9b4p-16;
- constexpr f32 cot_c12 = -0x6.f7d4dp-24;
+ constexpr f32 tan_c2 = CMT_FP(0x5.555378p-4, 3.333315551280975342e-01);
+ constexpr f32 tan_c4 = CMT_FP(0x2.225bb8p-4, 1.333882510662078857e-01);
+ constexpr f32 tan_c6 = CMT_FP(0xd.ac3fep-8, 5.340956896543502808e-02);
+ constexpr f32 tan_c8 = CMT_FP(0x6.41644p-8, 2.443529665470123291e-02);
+ constexpr f32 tan_c10 = CMT_FP(0xc.bfe7ep-12, 3.112703096121549606e-03);
+ constexpr f32 tan_c12 = CMT_FP(0x2.6754dp-8, 9.389210492372512817e-03);
+
+ constexpr f32 cot_c2 = CMT_FP(-0x5.555558p-4, -3.333333432674407959e-01);
+ constexpr f32 cot_c4 = CMT_FP(-0x5.b0581p-8, -2.222204580903053284e-02);
+ constexpr f32 cot_c6 = CMT_FP(-0x8.ac5ccp-12, -2.117502503097057343e-03);
+ constexpr f32 cot_c8 = CMT_FP(-0xd.aaa01p-16, -2.085343148792162538e-04);
+ constexpr f32 cot_c10 = CMT_FP(-0x1.a9a9b4p-16, -2.537148611736483872e-05);
+ constexpr f32 cot_c12 = CMT_FP(-0x6.f7d4dp-24, -4.153305894760705996e-07);
const vec<f32, N> x2 = x * x;
const vec<f32, N> val = trig_horner(x2, inverse, 1.0f, 1.0f, cot_c2, tan_c2, cot_c4, tan_c4, cot_c6,
@@ -92,27 +92,27 @@ KFR_SINTRIN vec<f64, N> tan(const vec<f64, N>& x_full)
mask<f64, N> inverse;
const vec<f64, N> x = trig_fold_simple(x_full, inverse);
- constexpr f64 tan_c2 = 0x5.5555554d8e5b8p-4;
- constexpr f64 tan_c4 = 0x2.222224820264p-4;
- constexpr f64 tan_c6 = 0xd.d0d90de32b3e8p-8;
- constexpr f64 tan_c8 = 0x5.99723bdcf5cacp-8;
- constexpr f64 tan_c10 = 0x2.434a142e413ap-8;
- constexpr f64 tan_c12 = 0xf.2b59061305efp-12;
- constexpr f64 tan_c14 = 0x4.a12565071a664p-12;
- constexpr f64 tan_c16 = 0x4.dada3797ac1bcp-12;
- constexpr f64 tan_c18 = -0x1.a74976b6ea3f3p-12;
- constexpr f64 tan_c20 = 0x1.d06a5ae5e4a74p-12;
-
- constexpr f64 cot_c2 = -0x5.5555555555554p-4;
- constexpr f64 cot_c4 = -0x5.b05b05b05b758p-8;
- constexpr f64 cot_c6 = -0x8.ab355dffc79a8p-12;
- constexpr f64 cot_c8 = -0xd.debbca405c9f8p-16;
- constexpr f64 cot_c10 = -0x1.66a8edb99b15p-16;
- constexpr f64 cot_c12 = -0x2.450239be0ee92p-20;
- constexpr f64 cot_c14 = -0x3.ad6ddb4719438p-24;
- constexpr f64 cot_c16 = -0x5.ff4c42741356p-28;
- constexpr f64 cot_c18 = -0x9.06881bcdf3108p-32;
- constexpr f64 cot_c20 = -0x1.644abedc113cap-32;
+ constexpr f64 tan_c2 = CMT_FP(0x5.5555554d8e5b8p-4, 3.333333332201594557e-01);
+ constexpr f64 tan_c4 = CMT_FP(0x2.222224820264p-4, 1.333333421790934281e-01);
+ constexpr f64 tan_c6 = CMT_FP(0xd.d0d90de32b3e8p-8, 5.396801556632355862e-02);
+ constexpr f64 tan_c8 = CMT_FP(0x5.99723bdcf5cacp-8, 2.187265359403693307e-02);
+ constexpr f64 tan_c10 = CMT_FP(0x2.434a142e413ap-8, 8.839254309582239566e-03);
+ constexpr f64 tan_c12 = CMT_FP(0xf.2b59061305efp-12, 3.703449009834865711e-03);
+ constexpr f64 tan_c14 = CMT_FP(0x4.a12565071a664p-12, 1.130243370829653185e-03);
+ constexpr f64 tan_c16 = CMT_FP(0x4.dada3797ac1bcp-12, 1.185276423238536747e-03);
+ constexpr f64 tan_c18 = CMT_FP(-0x1.a74976b6ea3f3p-12, -4.036779095551438937e-04);
+ constexpr f64 tan_c20 = CMT_FP(0x1.d06a5ae5e4a74p-12, 4.429010863244216712e-04);
+
+ constexpr f64 cot_c2 = CMT_FP(-0x5.5555555555554p-4, -3.333333333333333148e-01);
+ constexpr f64 cot_c4 = CMT_FP(-0x5.b05b05b05b758p-8, -2.222222222222377391e-02);
+ constexpr f64 cot_c6 = CMT_FP(-0x8.ab355dffc79a8p-12, -2.116402116358796163e-03);
+ constexpr f64 cot_c8 = CMT_FP(-0xd.debbca405c9f8p-16, -2.116402122295888289e-04);
+ constexpr f64 cot_c10 = CMT_FP(-0x1.66a8edb99b15p-16, -2.137779458737224013e-05);
+ constexpr f64 cot_c12 = CMT_FP(-0x2.450239be0ee92p-20, -2.164426049513111728e-06);
+ constexpr f64 cot_c14 = CMT_FP(-0x3.ad6ddb4719438p-24, -2.191935496317727080e-07);
+ constexpr f64 cot_c16 = CMT_FP(-0x5.ff4c42741356p-28, -2.234152473099993830e-08);
+ constexpr f64 cot_c18 = CMT_FP(-0x9.06881bcdf3108p-32, -2.101416316020595077e-09);
+ constexpr f64 cot_c20 = CMT_FP(-0x1.644abedc113cap-32, -3.240456633529511097e-10);
const vec<f64, N> x2 = x * x;
const vec<f64, N> val = trig_horner(x2, inverse, 1.0, 1.0, cot_c2, tan_c2, cot_c4, tan_c4, cot_c6, tan_c6,
diff --git a/include/kfr/base/types.hpp b/include/kfr/base/types.hpp
@@ -30,8 +30,8 @@
#include <cmath>
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wshadow"
+CMT_PRAGMA_GNU(GCC diagnostic push)
+CMT_PRAGMA_GNU(GCC diagnostic ignored "-Wshadow")
#include "../cometa.hpp"
@@ -210,11 +210,14 @@ using returns = cometa::fn_returns<T>;
}
template <typename T>
-using ftype = deep_rebind<T, float_type<typebits<deep_subtype<T>>::bits>>;
+using ftype =
+ typename compound_type_traits<T>::template deep_rebind<float_type<typebits<deep_subtype<T>>::bits>>;
template <typename T>
-using itype = deep_rebind<T, int_type<typebits<deep_subtype<T>>::bits>>;
+using itype =
+ typename compound_type_traits<T>::template deep_rebind<int_type<typebits<deep_subtype<T>>::bits>>;
template <typename T>
-using utype = deep_rebind<T, unsigned_type<typebits<deep_subtype<T>>::bits>>;
+using utype =
+ typename compound_type_traits<T>::template deep_rebind<unsigned_type<typebits<deep_subtype<T>>::bits>>;
template <typename T>
using fsubtype = ftype<subtype<T>>;
@@ -301,8 +304,8 @@ CMT_INLINE void zeroize(T1& value)
builtin_memset(static_cast<void*>(builtin_addressof(value)), 0, sizeof(T1));
}
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wattributes"
+CMT_PRAGMA_GNU(GCC diagnostic push)
+CMT_PRAGMA_GNU(GCC diagnostic ignored "-Wattributes")
template <typename T, bool A>
struct struct_with_alignment
@@ -322,7 +325,7 @@ __attribute__((__packed__, __may_alias__)) //
#endif
;
-#pragma GCC diagnostic pop
+CMT_PRAGMA_GNU(GCC diagnostic pop)
}
template <typename T>
@@ -330,45 +333,6 @@ struct initialvalue
{
};
-#if CMT_COMPILER_GNU
-constexpr double infinity = __builtin_inf();
-constexpr double qnan = __builtin_nan("");
-#else
-constexpr double infinity = HUGE_VAL;
-constexpr double qnan = NAN;
-#endif
-
-namespace internal
-{
-#if CMT_COMPILER_GNU
-constexpr f32 allones_f32 = -__builtin_nanf("0xFFFFFFFF");
-constexpr f64 allones_f64 = -__builtin_nan("0xFFFFFFFFFFFFFFFF");
-#else
-constexpr f32 allones_f32 = -NAN;
-constexpr f64 allones_f64 = -NAN;
-#endif
-
-template <typename T, typename Tsub = subtype<T>>
-constexpr Tsub allones = choose_const<Tsub>(allones_f32, allones_f64, static_cast<Tsub>(-1));
-
-template <typename T, typename Tsub = subtype<T>>
-constexpr Tsub allzeros = Tsub();
-
-template <typename T, typename Tsub = subtype<T>>
-constexpr Tsub highbitmask = choose_const<Tsub>(-0.f, -0.0, 1ull << (typebits<T>::bits - 1));
-
-template <typename T, typename Tsub = subtype<T>>
-constexpr Tsub invhighbitmask = choose_const<Tsub>(__builtin_nanf("0xFFFFFFFF"),
- __builtin_nan("0xFFFFFFFFFFFFFFFF"),
- ~(1ull << (typebits<T>::bits - 1)));
-
-template <typename T>
-constexpr inline T maskbits(bool value)
-{
- return value ? internal::allones<T> : T();
-}
-}
-
namespace internal
{
template <size_t width, typename Fn>
@@ -376,7 +340,7 @@ CMT_INLINE void block_process_impl(size_t& i, size_t size, Fn&& fn)
{
CMT_LOOP_NOUNROLL
for (; i < size / width * width; i += width)
- fn(i, csize<width>);
+ fn(i, csize_t<width>());
}
}
@@ -388,4 +352,4 @@ CMT_INLINE void block_process(size_t size, csizes_t<widths...>, Fn&& fn)
}
}
-#pragma GCC diagnostic pop
+CMT_PRAGMA_GNU(GCC diagnostic pop)
diff --git a/include/kfr/base/univector.hpp b/include/kfr/base/univector.hpp
@@ -32,6 +32,9 @@
#include "read_write.hpp"
#include "types.hpp"
+CMT_PRAGMA_MSVC(warning(push))
+CMT_PRAGMA_MSVC(warning(disable : 4324))
+
namespace kfr
{
@@ -136,7 +139,7 @@ struct univector_base : input_expression, output_expression
{
ringbuf_write(cursor, x.data(), N);
}
- void ringbuf_write(size_t& cursor, const T value)
+ void ringbuf_write(size_t& cursor, const T& value)
{
T* data = get_data();
data[cursor] = value;
@@ -205,8 +208,8 @@ private:
};
template <typename T, size_t Size>
-struct alignas(maximum_vector_alignment) univector : std::array<T, Size>,
- univector_base<T, univector<T, Size>>
+struct alignas(platform<>::maximum_vector_alignment) univector : std::array<T, Size>,
+ univector_base<T, univector<T, Size>>
{
using std::array<T, Size>::size;
using size_type = size_t;
@@ -216,7 +219,8 @@ struct alignas(maximum_vector_alignment) univector : std::array<T, Size>,
this->assign_expr(std::forward<Input>(input));
}
template <typename... Args>
- constexpr univector(T x, Args... args) noexcept : std::array<T, Size>{ { x, static_cast<T>(args)... } }
+ constexpr univector(const T& x, const Args&... args) noexcept
+ : std::array<T, Size>{ { x, static_cast<T>(args)... } }
{
}
@@ -344,3 +348,5 @@ CMT_INLINE univector<T> render(Expr&& expr, size_t size)
return result;
}
}
+
+CMT_PRAGMA_MSVC(warning(pop))
diff --git a/include/kfr/base/vec.hpp b/include/kfr/base/vec.hpp
@@ -27,15 +27,15 @@
#include "kfr.h"
-#include "types.hpp"
-
+#include "constants.hpp"
#include "simd.hpp"
+#include "types.hpp"
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wfloat-equal"
-#pragma clang diagnostic ignored "-Wc++98-compat-local-type-template-args"
-#pragma clang diagnostic ignored "-Wshadow"
-#pragma clang diagnostic ignored "-Wpacked"
+CMT_PRAGMA_GNU(GCC diagnostic push)
+CMT_PRAGMA_GNU(GCC diagnostic ignored "-Wfloat-equal")
+CMT_PRAGMA_GNU(GCC diagnostic ignored "-Wc++98-compat-local-type-template-args")
+CMT_PRAGMA_GNU(GCC diagnostic ignored "-Wshadow")
+CMT_PRAGMA_GNU(GCC diagnostic ignored "-Wpacked")
namespace kfr
{
@@ -139,6 +139,12 @@ struct mask;
namespace internal
{
+template <typename T>
+constexpr inline T maskbits(bool value)
+{
+ return value ? constants<T>::allones() : T();
+}
+
template <typename T, size_t N>
struct flt_type_impl<vec<T, N>>
{
@@ -175,38 +181,36 @@ constexpr CMT_INLINE vec<To, Nout> compcast(const vec<From, N>& value) noexcept
namespace internal
{
template <typename Fn, size_t index>
-constexpr enable_if<std::is_same<size_t, decltype(std::declval<Fn>().operator()(size_t()))>::value, size_t>
-get_vec_index()
+constexpr size_t get_vec_index() noexcept
{
+#ifdef CMT_COMPILER_GNU
constexpr Fn fn{};
- return fn(index);
+ return fn(csize_t<index>());
+#else
+ return Fn()(csize_t<index>());
+#endif
}
-template <typename Fn, size_t index>
-constexpr enable_if<
- std::is_same<size_t, decltype(std::declval<Fn>().template operator() < index > ())>::value, size_t>
-get_vec_index(int = 0)
+constexpr int vindex(size_t index) { return index == index_undefined ? -1 : static_cast<int>(index); }
+
+template <typename T, size_t N, size_t... Indices, typename = enable_if<!(is_compound<T>::value)>,
+ typename = void>
+CMT_INLINE vec<T, sizeof...(Indices)> shufflevector_i_t(csizes_t<Indices...>, const vec<T, N>& x,
+ const vec<T, N>& y)
{
- constexpr Fn fn{};
- return fn.template operator()<index>();
+ vec<T, sizeof...(Indices)> result = KFR_SIMD_SHUFFLE(*x, *y, vindex(Indices)...);
+ return result;
}
-template <typename T, size_t N, size_t... Indices, KFR_ENABLE_IF(!is_compound<T>::value)>
-CMT_INLINE vec<T, sizeof...(Indices)> shufflevector(csizes_t<Indices...>, const vec<T, N>& x,
- const vec<T, N>& y)
+constexpr size_t vinflate_index(size_t counter, size_t groupsize, size_t index)
{
- vec<T, sizeof...(Indices)> result = KFR_SIMD_SHUFFLE(
- *x, *y, static_cast<intptr_t>(Indices == index_undefined ? -1 : static_cast<intptr_t>(Indices))...);
- return result;
+ return index == index_undefined ? index_undefined : (counter % groupsize + groupsize * index);
}
template <size_t counter, size_t groupsize, size_t... indices>
constexpr size_t inflate_get_index()
{
- constexpr csizes_t<indices...> ind{};
- return (ind.get(csize<counter / groupsize>) == index_undefined
- ? index_undefined
- : (counter % groupsize + groupsize * ind.get(csize<counter / groupsize>)));
+ return vinflate_index(counter, groupsize, csizes_t<indices...>().get(csize_t<counter / groupsize>()));
}
template <size_t... indices, size_t... counter, size_t groupsize = sizeof...(counter) / sizeof...(indices)>
@@ -215,29 +219,25 @@ constexpr auto inflate_impl(csizes_t<indices...> ind, csizes_t<counter...> cnt)
{
return {};
}
-}
-
-namespace internal
-{
template <size_t groupsize, size_t... indices>
constexpr auto inflate(csize_t<groupsize>, csizes_t<indices...>)
{
- return inflate_impl(csizes<indices...>, csizeseq<sizeof...(indices)*groupsize>);
+ return inflate_impl(csizes_t<indices...>(), csizeseq_t<sizeof...(indices)*groupsize>());
}
-template <typename T, size_t N, size_t... Indices, KFR_ENABLE_IF(is_compound<T>::value)>
-CMT_INLINE vec<T, sizeof...(Indices)> shufflevector(csizes_t<Indices...> indices, const vec<T, N>& x,
- const vec<T, N>& y)
+template <typename T, size_t N, size_t... Indices, typename = enable_if<(is_compound<T>::value)>>
+CMT_INLINE vec<T, sizeof...(Indices)> shufflevector_i_t(csizes_t<Indices...> indices, const vec<T, N>& x,
+ const vec<T, N>& y)
{
- return compcast<T>(shufflevector(inflate(csize<widthof<T>()>, indices), compcast<subtype<T>>(x),
- compcast<subtype<T>>(y)));
+ return compcast<T>(shufflevector_i_t(inflate(csize_t<widthof<T>()>(), indices), compcast<subtype<T>>(x),
+ compcast<subtype<T>>(y)));
}
template <size_t... Indices, size_t Nout = sizeof...(Indices), typename T, size_t N>
-CMT_INLINE vec<T, Nout> shufflevector(csizes_t<Indices...>, const vec<T, N>& x)
+CMT_INLINE vec<T, Nout> shufflevector_i(csizes_t<Indices...>, const vec<T, N>& x)
{
- return internal::shufflevector<T, N>(csizes<Indices...>, x, x);
+ return internal::shufflevector_i_t<T, N>(csizes_t<Indices...>(), x, x);
}
template <typename Fn, size_t groupsize, typename T, size_t N, size_t... Indices,
@@ -245,23 +245,25 @@ template <typename Fn, size_t groupsize, typename T, size_t N, size_t... Indices
CMT_INLINE vec<T, Nout> shufflevector(const vec<T, N>& x, const vec<T, N>& y, cvals_t<size_t, Indices...>)
{
static_assert(N % groupsize == 0, "N % groupsize == 0");
- return internal::shufflevector<T, N>(
- csizes<(get_vec_index<Fn, Indices / groupsize>() * groupsize + Indices % groupsize)...>, x, y);
+ constexpr csizes_t<(get_vec_index<Fn, Indices / groupsize>() * groupsize + Indices % groupsize)...>
+ vindices{};
+ return internal::shufflevector_i_t<T, N>(vindices, x, y);
}
}
template <size_t Nout, typename Fn, size_t groupsize = 1, typename T, size_t N>
CMT_INLINE vec<T, Nout> shufflevector(const vec<T, N>& x, const vec<T, N>& y)
{
- return internal::shufflevector<Fn, groupsize>(x, y, csizeseq<Nout>);
+ return internal::shufflevector<Fn, groupsize>(x, y, cvalseq_t<size_t, Nout>());
}
template <size_t Nout, typename Fn, size_t groupsize = 1, typename T, size_t N>
CMT_INLINE vec<T, Nout> shufflevector(const vec<T, N>& x)
{
- return internal::shufflevector<Fn, groupsize>(x, x, csizeseq<Nout>);
+ return internal::shufflevector<Fn, groupsize>(x, x, cvalseq_t<size_t, Nout>());
}
+#ifdef KFR_ENABLE_SWIZZLE
namespace swizzle
{
template <size_t>
@@ -300,9 +302,10 @@ constexpr swiz<13> s13{};
constexpr swiz<14> s14{};
constexpr swiz<15> s15{};
}
+#endif
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wold-style-cast"
+CMT_PRAGMA_GNU(GCC diagnostic push)
+CMT_PRAGMA_GNU(GCC diagnostic ignored "-Wold-style-cast")
template <size_t N, typename T>
constexpr CMT_INLINE vec<T, N> broadcast(T x)
@@ -310,7 +313,7 @@ constexpr CMT_INLINE vec<T, N> broadcast(T x)
return x;
}
-#pragma clang diagnostic pop
+CMT_PRAGMA_GNU(GCC diagnostic pop)
namespace internal
{
@@ -381,14 +384,15 @@ constexpr CMT_INLINE vec<Tsub, Nout> flatten(const vec<From, N>& value) noexcept
return *value;
}
-template <typename To, typename From, typename Tout = deep_rebind<From, To>>
+template <typename To, typename From,
+ typename Tout = typename compound_type_traits<From>::template deep_rebind<To>>
constexpr CMT_INLINE Tout cast(const From& value) noexcept
{
return static_cast<Tout>(value);
}
template <typename To, typename From>
-constexpr CMT_INLINE To bitcast(const From& value) noexcept
+CMT_GNU_CONSTEXPR CMT_INLINE To bitcast(const From& value) noexcept
{
static_assert(sizeof(From) == sizeof(To), "bitcast: Incompatible types");
union {
@@ -399,7 +403,7 @@ constexpr CMT_INLINE To bitcast(const From& value) noexcept
}
template <typename To, typename From, size_t N, size_t Nout = N* size_of<From>() / size_of<To>()>
-constexpr CMT_INLINE vec<To, Nout> bitcast(const vec<From, N>& value) noexcept
+CMT_GNU_CONSTEXPR CMT_INLINE vec<To, Nout> bitcast(const vec<From, N>& value) noexcept
{
using Tsub = typename vec<To, Nout>::scalar_type;
constexpr size_t width = vec<To, Nout>::scalar_size();
@@ -407,7 +411,7 @@ constexpr CMT_INLINE vec<To, Nout> bitcast(const vec<From, N>& value) noexcept
}
template <typename To, typename From, size_t N, size_t Nout = N* size_of<From>() / size_of<To>()>
-constexpr CMT_INLINE mask<To, Nout> bitcast(const mask<From, N>& value) noexcept
+CMT_GNU_CONSTEXPR CMT_INLINE mask<To, Nout> bitcast(const mask<From, N>& value) noexcept
{
using Tsub = typename mask<To, Nout>::scalar_type;
constexpr size_t width = mask<To, Nout>::scalar_size();
@@ -456,7 +460,7 @@ constexpr CMT_INLINE vec<To, Nout> fbitcast(const vec<From, N>& value) noexcept
constexpr CMT_INLINE size_t vector_alignment(size_t size) { return next_poweroftwo(size); }
template <typename T, size_t N, size_t... Sizes>
-CMT_INLINE vec<T, N + csum(csizes<Sizes...>)> concat(const vec<T, N>& x, const vec<T, Sizes>&... rest);
+CMT_INLINE vec<T, csum<size_t, N, Sizes...>()> concat(const vec<T, N>& x, const vec<T, Sizes>&... rest);
namespace internal
{
@@ -567,7 +571,7 @@ CMT_GNU_CONSTEXPR CMT_INLINE vec<T, N> make_vector_impl(csizes_t<indices...>, co
constexpr size_t width = compound_type_traits<T>::width;
const T list[] = { args... };
using simd_t = typename vec<T, N>::simd_t;
- return simd_t{ compound_type_traits<T>::at(list[indices / width], indices % width)... };
+ return KFR_SIMD_SET(simd_t, compound_type_traits<T>::at(list[indices / width], indices % width)...);
}
}
@@ -599,7 +603,7 @@ template <typename Type = void, typename Arg, typename... Args, size_t N = (size
KFR_ENABLE_IF(is_number<subtype<SubType>>::value)>
constexpr CMT_INLINE vec<SubType, N> pack(const Arg& x, const Args&... rest)
{
- return internal::make_vector_impl<SubType>(csizeseq<N * widthof<SubType>()>, static_cast<SubType>(x),
+ return internal::make_vector_impl<SubType>(csizeseq_t<N * widthof<SubType>()>(), static_cast<SubType>(x),
static_cast<SubType>(rest)...);
}
KFR_FN(pack)
@@ -616,11 +620,19 @@ struct CMT_EMPTY_BASES vec : vec_t<T, N>, operators::empty
{
static_assert(N > 0 && N <= 256, "Invalid vector size");
+ static_assert(std::is_same<T, float>::value || std::is_same<T, double>::value ||
+ std::is_same<T, signed char>::value || std::is_same<T, unsigned char>::value ||
+ std::is_same<T, short>::value || std::is_same<T, unsigned short>::value ||
+ std::is_same<T, int>::value || std::is_same<T, unsigned int>::value ||
+ std::is_same<T, long>::value || std::is_same<T, unsigned long>::value ||
+ std::is_same<T, long long>::value || std::is_same<T, unsigned long long>::value ||
+ !compound_type_traits<T>::is_scalar,
+ "Invalid vector type");
+
static_assert(!is_vec<T>::value || is_poweroftwo(size_of<T>()),
"Inner vector size must be a power of two");
constexpr static size_t scalar_size() noexcept { return N * compound_type_traits<T>::width; }
- using UT = utype<T>;
using value_type = T;
using scalar_type = subtype<T>;
using simd_t = simd<scalar_type, N * compound_type_traits<T>::width>;
@@ -630,34 +642,43 @@ struct CMT_EMPTY_BASES vec : vec_t<T, N>, operators::empty
constexpr static bool is_pod = true;
constexpr CMT_INLINE vec() noexcept {}
- constexpr CMT_INLINE vec(simd_t value) noexcept : v(value) {}
+ constexpr CMT_INLINE vec(const simd_t& value) noexcept : v(value) {}
+ constexpr CMT_INLINE vec(const vec&) noexcept = default;
+ constexpr CMT_INLINE vec(vec&&) noexcept = default;
+ CMT_INLINE vec& operator=(const vec&) noexcept = default;
+ CMT_INLINE vec& operator=(vec&&) noexcept = default;
+
template <typename U,
- KFR_ENABLE_IF(std::is_convertible<U, T>::value&& compound_type_traits<T>::width > 1)>
+ typename = enable_if<(std::is_convertible<U, T>::value && compound_type_traits<T>::width > 1)>>
constexpr CMT_INLINE vec(const U& value) noexcept
: v(*resize<scalar_size()>(bitcast<scalar_type>(make_vector(static_cast<T>(value)))))
{
}
template <typename U,
- KFR_ENABLE_IF(std::is_convertible<U, T>::value&& compound_type_traits<T>::width == 1)>
+ typename = enable_if<(std::is_convertible<U, T>::value && compound_type_traits<T>::width == 1)>,
+ typename = void>
constexpr CMT_INLINE vec(const U& value) noexcept : v(KFR_SIMD_BROADCAST(T, N, static_cast<T>(value)))
{
}
+
+ template <typename U,
+ typename = enable_if<std::is_convertible<U, T>::value && !std::is_same<vec<U, N>, T>::value>>
+ CMT_GNU_CONSTEXPR CMT_INLINE vec(const vec<U, N>& value) noexcept
+ : v(*internal::conversion<vec<T, N>, vec<U, N>>::cast(value))
+ {
+ }
template <typename... Ts>
- constexpr CMT_INLINE vec(const T& x, const T& y, const Ts&... rest) noexcept
- : v(*make_vector<T>(x, y, rest...))
+ CMT_GNU_CONSTEXPR CMT_INLINE vec(const T& x, const T& y, const Ts&... rest) noexcept
+ : v(*make_vector<T>(x, y, static_cast<T>(rest)...))
{
static_assert(N <= 2 + sizeof...(Ts), "Too few initializers for vec");
}
template <size_t N1, size_t N2, size_t... Ns>
- constexpr CMT_INLINE vec(const vec<T, N1>& v1, const vec<T, N2>& v2,
- const vec<T, Ns>&... vectors) noexcept : v(*concat(v1, v2, vectors...))
+ CMT_GNU_CONSTEXPR CMT_INLINE vec(const vec<T, N1>& v1, const vec<T, N2>& v2,
+ const vec<T, Ns>&... vectors) noexcept : v(*concat(v1, v2, vectors...))
{
- static_assert(csum(csizes<N1, N2, Ns...>) == N, "Can't concat vectors: invalid csizes");
+ static_assert(csum(csizes_t<N1, N2, Ns...>()) == N, "Can't concat vectors: invalid csizes");
}
- constexpr CMT_INLINE vec(const vec&) noexcept = default;
- constexpr CMT_INLINE vec(vec&&) noexcept = default;
- CMT_INLINE vec& operator=(const vec&) noexcept = default;
- CMT_INLINE vec& operator=(vec&&) noexcept = default;
friend CMT_INLINE vec operator-(const vec& x) { return vec_op<T, N>::neg(x.v); }
friend CMT_INLINE vec operator~(const vec& x) { return vec_op<T, N>::bnot(x.v); }
@@ -701,11 +722,11 @@ struct CMT_EMPTY_BASES vec : vec_t<T, N>, operators::empty
using array_t = T (&)[N];
CMT_INLINE array_t arr() { return ref_cast<array_t>(v); }
- template <typename U, KFR_ENABLE_IF(std::is_convertible<T, U>::value && !std::is_same<U, vec>::value)>
- CMT_INLINE constexpr operator vec<U, N>() const noexcept
+ /*template <typename U, KFR_ENABLE_IF(std::is_convertible<T, U>::value && !std::is_same<U, vec>::value)>
+ CMT_INLINE operator vec<U, N>() const noexcept
{
return internal::conversion<vec<U, N>, vec<T, N>>::cast(*this);
- }
+ }*/
private:
struct getter_setter;
@@ -815,11 +836,12 @@ struct mask : public vec<T, N>
using type = T;
constexpr static size_t width = N;
- using base = vec<T, N>;
+ using simd_t = typename vec<T, N>::simd_t;
+ using base = vec<T, N>;
constexpr CMT_INLINE mask() noexcept : base() {}
- constexpr CMT_INLINE mask(simd<T, N> value) noexcept : base(value) {}
+ constexpr CMT_INLINE mask(const simd_t& value) noexcept : base(value) {}
template <size_t N1, size_t... Ns>
constexpr CMT_INLINE mask(const mask<T, N1>& mask1, const mask<T, Ns>&... masks) noexcept
: base(*concat(mask1, masks...))
@@ -919,32 +941,34 @@ constexpr mask<T, Nout> partial_mask_helper(csizes_t<indices...>)
template <typename T, size_t Nout, size_t N1>
constexpr mask<T, Nout> partial_mask()
{
- return internal::partial_mask_helper<T, Nout, N1>(csizeseq<Nout>);
+ return internal::partial_mask_helper<T, Nout, N1>(csizeseq_t<Nout>());
}
template <typename T, size_t N>
-CMT_INLINE vec<T, N> concat(const vec<T, N>& x)
+CMT_INLINE vec<T, N> concat_impl(const vec<T, N>& x)
{
return x;
}
template <typename T, size_t N1, size_t N2>
-CMT_INLINE vec<T, N1 + N2> concat(const vec<T, N1>& x, const vec<T, N2>& y)
+CMT_INLINE vec<T, N1 + N2> concat_impl(const vec<T, N1>& x, const vec<T, N2>& y)
{
return concattwo<0, N1 + N2>(x, y);
}
template <typename T, size_t N1, size_t N2, size_t... Sizes>
-CMT_INLINE auto concat(const vec<T, N1>& x, const vec<T, N2>& y, const vec<T, Sizes>&... args)
+CMT_INLINE vec<T, csum<size_t, N1, N2, Sizes...>()> concat_impl(const vec<T, N1>& x, const vec<T, N2>& y,
+ const vec<T, Sizes>&... args)
{
- return concat(x, concat(y, args...));
+ return concat_impl(concat_impl(x, y), args...);
+ // return concat(x, concat(y, args...));
}
}
template <typename T, size_t N, size_t... Sizes>
-CMT_INLINE vec<T, N + csum(csizes<Sizes...>)> concat(const vec<T, N>& x, const vec<T, Sizes>&... rest)
+CMT_INLINE vec<T, csum<size_t, N, Sizes...>()> concat(const vec<T, N>& x, const vec<T, Sizes>&... rest)
{
- return internal::concat(x, rest...);
+ return internal::concat_impl(x, rest...);
}
KFR_FN(concat)
@@ -1239,13 +1263,14 @@ template <typename T, size_t N, typename Fn, typename... Args,
typename Tout = result_of<Fn(T, subtype<decay<Args>>...)>>
constexpr CMT_INLINE vec<Tout, N> apply(Fn&& fn, const vec<T, N>& arg, Args&&... args)
{
- return internal::apply_helper<T, N>(std::forward<Fn>(fn), csizeseq<N>, arg, std::forward<Args>(args)...);
+ return internal::apply_helper<T, N>(std::forward<Fn>(fn), csizeseq_t<N>(), arg,
+ std::forward<Args>(args)...);
}
template <size_t N, typename Fn, typename T = result_of<Fn()>>
constexpr CMT_INLINE vec<T, N> apply(Fn&& fn)
{
- return internal::apply0_helper<T, N>(std::forward<Fn>(fn), csizeseq<N>);
+ return internal::apply0_helper<T, N>(std::forward<Fn>(fn), csizeseq_t<N>());
}
template <typename T, int N>
@@ -1273,26 +1298,26 @@ constexpr CMT_INLINE mask<T, Nout> make_mask(bool arg, Args... args)
KFR_FN(make_mask)
template <typename T, size_t N>
-constexpr CMT_INLINE vec<T, N> zerovector()
+CMT_GNU_CONSTEXPR CMT_INLINE vec<T, N> zerovector()
{
constexpr size_t width = N * compound_type_traits<T>::width;
return compcast<T>(vec<subtype<T>, width>(simd<subtype<T>, width>()));
}
template <typename T, size_t N>
-constexpr CMT_INLINE vec<T, N> zerovector(vec_t<T, N>)
+CMT_GNU_CONSTEXPR CMT_INLINE vec<T, N> zerovector(vec_t<T, N>)
{
return zerovector<T, N>();
}
KFR_FN(zerovector)
template <typename T, size_t N>
-constexpr CMT_INLINE vec<T, N> allonesvector()
+CMT_GNU_CONSTEXPR CMT_INLINE vec<T, N> allonesvector()
{
return zerovector<T, N>() == zerovector<T, N>();
}
template <typename T, size_t N>
-constexpr CMT_INLINE vec<T, N> allonesvector(vec_t<T, N>)
+CMT_GNU_CONSTEXPR CMT_INLINE vec<T, N> allonesvector(vec_t<T, N>)
{
return allonesvector<T, N>();
}
@@ -1337,7 +1362,7 @@ KFR_FN(low)
KFR_FN(high)
}
-#pragma clang diagnostic pop
+CMT_PRAGMA_GNU(GCC diagnostic pop)
namespace cometa
{
@@ -1355,7 +1380,7 @@ struct compound_type_traits<kfr::vec_t<T, N>>
template <typename U>
using rebind = kfr::vec_t<U, N>;
template <typename U>
- using deep_rebind = kfr::vec_t<cometa::deep_rebind<subtype, U>, N>;
+ using deep_rebind = kfr::vec_t<typename compound_type_traits<subtype>::template deep_rebind<U>, N>;
};
template <typename T, size_t N>
@@ -1370,7 +1395,7 @@ struct compound_type_traits<kfr::vec<T, N>>
template <typename U>
using rebind = kfr::vec<U, N>;
template <typename U>
- using deep_rebind = kfr::vec<cometa::deep_rebind<subtype, U>, N>;
+ using deep_rebind = kfr::vec<typename compound_type_traits<subtype>::template deep_rebind<U>, N>;
CMT_INLINE static constexpr subtype at(const kfr::vec<T, N>& value, size_t index) { return value[index]; }
};
@@ -1387,7 +1412,7 @@ struct compound_type_traits<kfr::mask<T, N>>
template <typename U>
using rebind = kfr::mask<U, N>;
template <typename U>
- using deep_rebind = kfr::mask<cometa::deep_rebind<subtype, U>, N>;
+ using deep_rebind = kfr::mask<typename compound_type_traits<subtype>::template deep_rebind<U>, N>;
CMT_INLINE static constexpr subtype at(const kfr::mask<T, N>& value, size_t index)
{
@@ -1395,6 +1420,7 @@ struct compound_type_traits<kfr::mask<T, N>>
}
};
}
+
namespace std
{
template <typename T1, typename T2, size_t N>
@@ -1412,6 +1438,16 @@ struct common_type<T1, kfr::vec<T2, N>>
{
using type = kfr::vec<typename common_type<T1, T2>::type, N>;
};
+template <typename T1, typename T2, size_t N1, size_t N2>
+struct common_type<kfr::vec<T1, N1>, kfr::vec<kfr::vec<T2, N1>, N2>>
+{
+ using type = kfr::vec<kfr::vec<typename common_type<T1, T2>::type, N1>, N2>;
+};
+template <typename T1, typename T2, size_t N1, size_t N2>
+struct common_type<kfr::vec<kfr::vec<T1, N1>, N2>, kfr::vec<T2, N1>>
+{
+ using type = kfr::vec<kfr::vec<typename common_type<T1, T2>::type, N1>, N2>;
+};
template <typename T1, typename T2, size_t N>
struct common_type<kfr::mask<T1, N>, kfr::mask<T2, N>>
diff --git a/include/kfr/cident.h b/include/kfr/cident.h
@@ -205,6 +205,11 @@ extern char* gets(char* __s);
#if defined(__GNUC__) || defined(__clang__) // GCC, Clang
#define CMT_COMPILER_GNU 1
+
+#if !defined(__clang__) // GCC only
+#define CMT_COMPILER_GCC 1
+#endif
+
#define CMT_GNU_ATTRIBUTES 1
#define CMT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
#if __cplusplus >= 201103L || defined __GXX_EXPERIMENTAL_CXX0X__
@@ -261,8 +266,9 @@ extern char* gets(char* __s);
#endif
-#if defined _MSC_VER && _MSC_VER >= 1900 && defined(__clang__) && \
- (__clang_major__ > 3 || (__clang_major__ == 3 && __clang_minor__ >= 9))
+#if defined _MSC_VER && _MSC_VER >= 1900 && \
+ (!defined(__clang__) || \
+ (defined(__clang__) && (__clang_major__ > 3 || (__clang_major__ == 3 && __clang_minor__ >= 9))))
#define CMT_EMPTY_BASES __declspec(empty_bases)
#else
#define CMT_EMPTY_BASES
@@ -389,10 +395,12 @@ extern char* gets(char* __s);
#define CMT_HAS_EXCEPTIONS 1
#endif
+#if defined __has_include
#if __has_include(<assert.h>)
#include <assert.h>
#define CMT_HAS_ASSERT_H 1
#endif
+#endif
#ifndef CMT_THROW
#if CMT_HAS_EXCEPTIONS
@@ -431,8 +439,46 @@ extern char* gets(char* __s);
#define CMT_FAST_CC __attribute__((fastcall))
#define CMT_UNUSED __attribute__((unused))
#define CMT_GNU_CONSTEXPR constexpr
+#define CMT_GNU_PACKED __attribute__((packed))
+#define CMT_PRAGMA_PACK_PUSH_1
+#define CMT_PRAGMA_PACK_POP
+#define CMT_FP(h, d) h
+#define CMT_PRAGMA_GNU(...) _Pragma(#__VA_ARGS__)
+#ifdef CMT_COMPILER_CLANG
+#define CMT_PRAGMA_CLANG(...) _Pragma(#__VA_ARGS__)
+#else
+#define CMT_PRAGMA_CLANG(...)
+#endif
+#ifdef CMT_COMPILER_CLANG
+#define CMT_PRAGMA_GCC(...) _Pragma(#__VA_ARGS__)
+#else
+#define CMT_PRAGMA_GCC(...)
+#endif
+#define CMT_PRAGMA_MSVC(...)
#else
#define CMT_FAST_CC __fastcall
#define CMT_UNUSED
#define CMT_GNU_CONSTEXPR
+#define CMT_GNU_PACKED
+#define CMT_PRAGMA_PACK_PUSH_1 __pragma(pack(push, 1))
+#define CMT_PRAGMA_PACK_POP __pragma(pack(pop))
+#define CMT_FP(h, d) d
+#define CMT_PRAGMA_GNU(...)
+#define CMT_PRAGMA_CLANG(...)
+#define CMT_PRAGMA_GCC(...)
+#define CMT_PRAGMA_MSVC(...) __pragma(__VA_ARGS__)
+#endif
+
+#if defined CMT_COMPILER_CLANG
+#if defined _MSC_VER
+#define CMT_COMPIER_NAME "clang-msvc"
+#else
+#define CMT_COMPIER_NAME "clang"
+#endif
+#elif defined CMT_COMPILER_GCC
+#define CMT_COMPIER_NAME "gcc"
+#elif defined CMT_COMPILER_MSVC
+#define CMT_COMPIER_NAME "msvc"
+#else
+#define CMT_COMPIER_NAME "unknown"
#endif
diff --git a/include/kfr/cometa.hpp b/include/kfr/cometa.hpp
@@ -10,8 +10,11 @@
#include <type_traits>
#include <utility>
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wshadow"
+CMT_PRAGMA_GNU(GCC diagnostic push)
+CMT_PRAGMA_GNU(GCC diagnostic ignored "-Wshadow")
+
+CMT_PRAGMA_MSVC(warning(push))
+CMT_PRAGMA_MSVC(warning(disable : 4814))
namespace cometa
{
@@ -228,11 +231,12 @@ using subtype = typename compound_type_traits<T>::subtype;
template <typename T>
using deep_subtype = typename compound_type_traits<T>::deep_subtype;
-template <typename T, typename SubType>
+/*template <typename T, typename SubType>
using rebind_subtype = typename compound_type_traits<T>::template rebind<SubType>;
template <typename T, typename SubType>
using deep_rebind = typename compound_type_traits<T>::template deep_rebind<SubType>;
+ */
template <typename T>
struct compound_type_traits<std::pair<T, T>>
@@ -247,7 +251,8 @@ struct compound_type_traits<std::pair<T, T>>
template <typename U>
using rebind = std::pair<U, U>;
template <typename U>
- using deep_rebind = std::pair<cometa::deep_rebind<subtype, U>, cometa::deep_rebind<subtype, U>>;
+ using deep_rebind = std::pair<typename compound_type_traits<subtype>::template deep_rebind<U>,
+ typename compound_type_traits<subtype>::template deep_rebind<U>>;
CMT_INLINE static constexpr const subtype& at(const std::pair<subtype, subtype>& value, size_t index)
{
@@ -346,27 +351,12 @@ using cuint_t = cval_t<unsigned, val>;
template <size_t val>
using csize_t = cval_t<size_t, val>;
-template <typename T, T val>
-constexpr cval_t<T, val> cval{};
-
-template <bool val>
-constexpr cbool_t<val> cbool{};
-
using cfalse_t = cbool_t<false>;
using ctrue_t = cbool_t<true>;
constexpr ctrue_t ctrue{};
constexpr cfalse_t cfalse{};
-template <int val>
-constexpr cint_t<val> cint{};
-
-template <unsigned val>
-constexpr cuint_t<val> cuint{};
-
-template <size_t val>
-constexpr csize_t<val> csize{};
-
namespace details
{
template <size_t index, typename T, T first, T... rest>
@@ -406,17 +396,17 @@ struct cvals_t : ops::empty
using type = cvals_t<T, values...>;
constexpr static size_t size() { return sizeof...(values); }
template <size_t index>
- constexpr T operator[](csize_t<index>)
+ constexpr T operator[](csize_t<index>) const
{
- return get(csize<index>);
+ return get(csize_t<index>());
}
template <size_t index>
constexpr static T get(csize_t<index> = csize_t<index>())
{
return details::get_nth<index, T, values...>::value;
}
- constexpr static T front() { return get(csize<0>); }
- constexpr static T back() { return get(csize<size() - 1>); }
+ constexpr static T front() { return get(csize_t<0>()); }
+ constexpr static T back() { return get(csize_t<size() - 1>()); }
static const T* begin() { return array(); }
static const T* end() { return array() + size(); }
@@ -444,6 +434,8 @@ struct cvals_t<T> : ops::empty
template <bool... values>
using cbools_t = cvals_t<bool, values...>;
+constexpr cbools_t<false, true> cfalse_true{};
+
template <int... values>
using cints_t = cvals_t<int, values...>;
@@ -459,39 +451,16 @@ using csizes_t = cvals_t<size_t, values...>;
template <size_t... values>
using elements_t = cvals_t<size_t, values...>;
-template <typename T, T... values>
-constexpr cvals_t<T, values...> cvals{};
-
-template <bool... vals>
-constexpr cbools_t<vals...> cbools{};
-
-constexpr cbools_t<false, true> cfalse_true{};
-
-template <int... vals>
-constexpr cints_t<vals...> cints{};
-
-template <char... vals>
-constexpr cchars_t<vals...> cchars{};
-
-template <unsigned... vals>
-constexpr cuints_t<vals...> cuints{};
-
-template <size_t... vals>
-constexpr csizes_t<vals...> csizes{};
-
-template <size_t... vals>
-constexpr elements_t<vals...> elements{};
-
template <typename T>
-constexpr inline T csum(cvals_t<T>)
+constexpr inline T csum(cvals_t<T> = cvals_t<T>())
{
return 0;
}
template <typename T, T first, T... rest>
-constexpr inline T csum(cvals_t<T, first, rest...>)
+constexpr inline T csum(cvals_t<T, first, rest...> = cvals_t<T, first, rest...>())
{
- return first + csum(cvals<T, rest...>);
+ return first + csum(cvals_t<T, rest...>());
}
template <typename T>
@@ -503,7 +472,7 @@ constexpr inline T cprod(cvals_t<T>)
template <typename T, T first, T... rest>
constexpr inline T cprod(cvals_t<T, first, rest...>)
{
- return first * cprod(cvals<T, rest...>);
+ return first * cprod(cvals_t<T, rest...>());
}
template <typename T>
@@ -515,9 +484,6 @@ struct ctype_t
template <typename T>
using type_of = typename T::type;
-template <typename T>
-constexpr ctype_t<T> ctype{};
-
template <typename... Types>
struct ctypes_t
{
@@ -533,8 +499,6 @@ struct ctypes_t
}
};
-template <typename... Ts>
-constexpr ctypes_t<Ts...> ctypes{};
namespace details
{
template <typename T1, typename T2>
@@ -711,36 +675,12 @@ struct cvalseq_impl<T, 1, Nstart, Nstep> : cvals_t<T, static_cast<T>(Nstart)>
template <typename T, size_t size, T start = T(), ptrdiff_t step = 1>
using cvalseq_t = typename details::cvalseq_impl<T, size, start, step>::type;
-template <typename T, T begin, T end>
-constexpr cvalseq_t<T, end - begin, begin> cvalrange{};
-
-template <size_t begin, size_t end>
-constexpr cvalseq_t<size_t, end - begin, begin> csizerange{};
-
-template <int begin, int end>
-constexpr cvalseq_t<int, end - begin, begin> cintrange{};
-
-template <unsigned begin, unsigned end>
-constexpr cvalseq_t<unsigned, end - begin, begin> cuintrange{};
-
-template <typename T, size_t size, T start = T(), ptrdiff_t step = 1>
-constexpr cvalseq_t<T, size, start, step> cvalseq{};
-
template <size_t size, size_t start = 0, ptrdiff_t step = 1>
-constexpr cvalseq_t<size_t, size, start, step> csizeseq{};
-
-template <size_t size, int start = 0, ptrdiff_t step = 1>
-constexpr cvalseq_t<int, size, start, step> cintseq{};
-
-template <size_t size, unsigned start = 0, ptrdiff_t step = 1>
-constexpr cvalseq_t<unsigned, size, start, step> cuintseq{};
+using csizeseq_t = cvalseq_t<size_t, size, start, step>;
template <typename... List>
using indicesfor_t = cvalseq_t<size_t, sizeof...(List), 0>;
-template <typename... List>
-constexpr indicesfor_t<List...> indicesfor{};
-
namespace details
{
@@ -775,21 +715,27 @@ struct is_enabled_impl<Fn, void_t<decltype(Fn::disabled)>> : std::integral_const
{
};
-template <size_t N>
+template <int N>
struct unique_enum_impl
{
- enum class type : size_t
+ enum type : int
{
value = N
};
};
-template <size_t N>
-using unique_enum = typename unique_enum_impl<N>::type;
+#ifdef CMT_COMPILER_MSVC
+#define CMT_ENABLE_IF_IMPL(N, ...) \
+ bool enable_ = (__VA_ARGS__), typename enabled_ = ::std::enable_if<enable_>::type, \
+ typename cometa::details::unique_enum_impl<N>::type dummy_ = \
+ ::cometa::details::unique_enum_impl<N>::value
+
+#else
#define CMT_ENABLE_IF_IMPL(N, ...) \
- typename ::std::enable_if<(__VA_ARGS__), ::cometa::details::unique_enum<N>>::type = \
- ::cometa::details::unique_enum<N>::value
+ typename ::std::enable_if<(__VA_ARGS__), typename ::cometa::details::unique_enum_impl<N>::type>::type = \
+ ::cometa::details::unique_enum_impl<N>::value
+#endif
#define CMT_ENABLE_IF(...) CMT_ENABLE_IF_IMPL(__LINE__, __VA_ARGS__)
}
@@ -1020,10 +966,16 @@ struct identity_impl
};
template <typename T>
-constexpr size_t elementsize = sizeof(T);
+constexpr size_t elementsize()
+{
+ return sizeof(T);
+}
template <>
-constexpr size_t elementsize<void> = 1;
+constexpr size_t elementsize<void>()
+{
+ return 1;
+};
}
template <typename T>
@@ -1048,7 +1000,7 @@ struct carray<T, 1>
template <typename Fn, size_t index = 0, CMT_ENABLE_IF(is_callable<Fn, csize_t<index>>::value)>
CMT_INTRIN constexpr carray(Fn&& fn, csize_t<index> = csize_t<index>{}) noexcept
- : val(static_cast<T>(fn(csize<index>)))
+ : val(static_cast<T>(fn(csize_t<index>())))
{
}
@@ -1071,12 +1023,12 @@ struct carray<T, 1>
template <size_t index>
CMT_INTRIN constexpr T& get() noexcept
{
- return get(csize<index>);
+ return get(csize_t<index>());
}
template <size_t index>
CMT_INTRIN constexpr const T& get() const noexcept
{
- return get(csize<index>);
+ return get(csize_t<index>());
}
CMT_INTRIN constexpr const T* front() const noexcept { return val; }
CMT_INTRIN constexpr T* front() noexcept { return val; }
@@ -1103,8 +1055,8 @@ struct carray : carray<T, N - 1>
template <typename Fn, size_t index = N - 1>
CMT_INTRIN constexpr carray(Fn&& fn, csize_t<index> = csize_t<index>{}) noexcept
- : carray<T, N - 1>(std::forward<Fn>(fn), csize<index - 1>),
- val(static_cast<T>(fn(csize<index>)))
+ : carray<T, N - 1>(std::forward<Fn>(fn), csize_t<index - 1>()),
+ val(static_cast<T>(fn(csize_t<index>())))
{
}
@@ -1116,23 +1068,23 @@ struct carray : carray<T, N - 1>
template <size_t index>
CMT_INTRIN constexpr T& get(csize_t<index>) noexcept
{
- return carray<T, N - 1>::get(csize<index>);
+ return carray<T, N - 1>::get(csize_t<index>());
}
CMT_INTRIN constexpr const T& get(csize_t<N - 1>) const noexcept { return val; }
template <size_t index>
CMT_INTRIN constexpr const T& get(csize_t<index>) const noexcept
{
- return carray<T, N - 1>::get(csize<index>);
+ return carray<T, N - 1>::get(csize_t<index>());
}
template <size_t index>
CMT_INTRIN constexpr T& get() noexcept
{
- return get(csize<index>);
+ return get(csize_t<index>());
}
template <size_t index>
CMT_INTRIN constexpr const T& get() const noexcept
{
- return get(csize<index>);
+ return get(csize_t<index>());
}
CMT_INTRIN constexpr const T* front() const noexcept { return carray<T, N - 1>::front(); }
CMT_INTRIN constexpr T* front() noexcept { return carray<T, N - 1>::front(); }
@@ -1307,10 +1259,25 @@ using has_data_size = details::has_data_size_impl<decay<T>>;
template <typename T>
using value_type_of = typename decay<T>::value_type;
+#ifndef CMT_COMPILER_CLANG
+namespace details
+{
+template <typename T, T value, typename Fn>
+void cforeach_impl(Fn&& fn)
+{
+ fn(cval_t<T, value>());
+}
+}
+#endif
+
template <typename T, T... values, typename Fn>
CMT_INTRIN void cforeach(cvals_t<T, values...>, Fn&& fn)
{
- swallow{ (fn(cval<T, values>), void(), 0)... };
+#ifdef CMT_COMPILER_CLANG
+ swallow{ (fn(cval_t<T, values>()), void(), 0)... };
+#else
+ swallow{ (details::cforeach_impl<T, values>(std::forward<Fn>(fn)), void(), 0)... };
+#endif
}
template <typename T, typename Fn, CMT_ENABLE_IF(has_begin_end<T>::value)>
@@ -1335,9 +1302,9 @@ namespace details
{
template <size_t index, typename... types>
-CMT_INTRIN auto get_type_arg(ctypes_t<types...> type_list)
+CMT_INTRIN auto get_type_arg(ctypes_t<types...>)
{
- return ctype<type_of<details::get_nth_type<index, types...>>>;
+ return ctype_t<type_of<details::get_nth_type<index, types...>>>();
}
template <typename T0, typename... types, typename Fn, size_t... indices>
@@ -1350,7 +1317,7 @@ CMT_INTRIN void cforeach_types_impl(ctypes_t<T0, types...> type_list, Fn&& fn, c
template <typename... Ts, typename Fn>
CMT_INTRIN void cforeach(ctypes_t<Ts...> types, Fn&& fn)
{
- details::cforeach_types_impl(types, std::forward<Fn>(fn), csizeseq<sizeof...(Ts)>);
+ details::cforeach_types_impl(types, std::forward<Fn>(fn), csizeseq_t<sizeof...(Ts)>());
}
template <typename A0, typename A1, typename Fn>
@@ -1371,26 +1338,26 @@ CMT_INTRIN void cforeach(A0&& a0, A1&& a1, A2&& a2, Fn&& fn)
template <typename TrueFn, typename FalseFn = fn_noop>
CMT_INTRIN decltype(auto) cif(cbool_t<true>, TrueFn&& truefn, FalseFn&& = FalseFn())
{
- return truefn(cbool<true>);
+ return truefn(ctrue);
}
template <typename TrueFn, typename FalseFn = fn_noop>
CMT_INTRIN decltype(auto) cif(cbool_t<false>, TrueFn&&, FalseFn&& falsefn = FalseFn())
{
- return falsefn(cbool<false>);
+ return falsefn(cfalse);
}
template <typename T, T start, T stop, typename BodyFn>
CMT_INTRIN decltype(auto) cfor(cval_t<T, start>, cval_t<T, stop>, BodyFn&& bodyfn)
{
- return cforeach(cvalrange<T, start, stop>, std::forward<BodyFn>(bodyfn));
+ return cforeach(cvalseq_t<T, stop - start, start>(), std::forward<BodyFn>(bodyfn));
}
template <typename T, T... vs, typename U, typename Function, typename Fallback = fn_noop>
void cswitch(cvals_t<T, vs...>, const U& value, Function&& function, Fallback&& fallback = Fallback())
{
bool result = false;
- swallow{ (result = result || ((vs == value) ? (function(cval<T, vs>), void(), true) : false), void(),
+ swallow{ (result = result || ((vs == value) ? (function(cval_t<T, vs>()), void(), true) : false), void(),
0)... };
if (!result)
fallback();
@@ -1408,7 +1375,7 @@ CMT_INTRIN decltype(auto) cswitch(cvals_t<T, v0, values...>, identity<T> value,
{
if (cmpfn(value, v0))
{
- return fn(cval<T, v0>);
+ return fn(cval_t<T, v0>());
}
else
{
@@ -1442,7 +1409,7 @@ inline decltype(auto) cmatch_impl(T&& value, Fn1&& first, Fn2&& second, Fns&&...
{
using first_arg = typename function_arguments<Fn1>::template nth<0>;
constexpr bool is_same = std::is_same<decay<T>, decay<first_arg>>::value;
- return cmatch_impl2(cbool<is_same>, std::forward<T>(value), std::forward<Fn1>(first),
+ return cmatch_impl2(cbool_t<is_same>(), std::forward<T>(value), std::forward<Fn1>(first),
std::forward<Fn2>(second), std::forward<Fns>(rest)...);
}
@@ -1588,9 +1555,9 @@ constexpr inline ptrdiff_t distance(const void* x, const void* y)
return static_cast<const unsigned char*>(x) - static_cast<const unsigned char*>(y);
}
-#pragma GCC diagnostic push
+CMT_PRAGMA_GNU(GCC diagnostic push)
#if CMT_HAS_WARNING("-Wundefined-reinterpret-cast")
-#pragma GCC diagnostic ignored "-Wundefined-reinterpret-cast"
+CMT_PRAGMA_GNU(GCC diagnostic ignored "-Wundefined-reinterpret-cast")
#endif
template <typename T, typename U>
@@ -1641,7 +1608,80 @@ CMT_INLINE constexpr static T implicit_cast(U&& value)
return std::forward<T>(value);
}
-#pragma GCC diagnostic pop
+#ifdef CMT_COMPILER_GNU
+
+template <typename T, T val>
+constexpr cval_t<T, val> cval{};
+
+template <bool val>
+constexpr cbool_t<val> cbool{};
+
+template <int val>
+constexpr cint_t<val> cint{};
+
+template <unsigned val>
+constexpr cuint_t<val> cuint{};
+
+template <size_t val>
+constexpr csize_t<val> csize{};
+
+template <typename T, T... values>
+constexpr cvals_t<T, values...> cvals{};
+
+template <bool... vals>
+constexpr cbools_t<vals...> cbools{};
+
+template <int... vals>
+constexpr cints_t<vals...> cints{};
+
+template <char... vals>
+constexpr cchars_t<vals...> cchars{};
+
+template <unsigned... vals>
+constexpr cuints_t<vals...> cuints{};
+
+template <size_t... vals>
+constexpr csizes_t<vals...> csizes{};
+
+template <size_t... vals>
+constexpr elements_t<vals...> elements{};
+
+template <typename T>
+constexpr ctype_t<T> ctype{};
+
+template <typename... Ts>
+constexpr ctypes_t<Ts...> ctypes{};
+
+template <typename T, T begin, T end>
+constexpr cvalseq_t<T, end - begin, begin> cvalrange{};
+
+template <size_t begin, size_t end>
+constexpr cvalseq_t<size_t, end - begin, begin> csizerange{};
+
+template <int begin, int end>
+constexpr cvalseq_t<int, end - begin, begin> cintrange{};
+
+template <unsigned begin, unsigned end>
+constexpr cvalseq_t<unsigned, end - begin, begin> cuintrange{};
+
+template <typename T, size_t size, T start = T(), ptrdiff_t step = 1>
+constexpr cvalseq_t<T, size, start, step> cvalseq{};
+
+template <size_t size, size_t start = 0, ptrdiff_t step = 1>
+constexpr cvalseq_t<size_t, size, start, step> csizeseq{};
+
+template <size_t size, int start = 0, ptrdiff_t step = 1>
+constexpr cvalseq_t<int, size, start, step> cintseq{};
+
+template <size_t size, unsigned start = 0, ptrdiff_t step = 1>
+constexpr cvalseq_t<unsigned, size, start, step> cuintseq{};
+template <typename... List>
+constexpr indicesfor_t<List...> indicesfor{};
+#endif
+
+CMT_PRAGMA_GNU(GCC diagnostic pop)
}
-#pragma GCC diagnostic pop
+CMT_PRAGMA_GNU(GCC diagnostic pop)
+
+CMT_PRAGMA_MSVC(warning(pop))
diff --git a/include/kfr/cometa/array.hpp b/include/kfr/cometa/array.hpp
@@ -30,8 +30,13 @@ public:
constexpr array_ref() noexcept : m_data(nullptr), m_size(0) {}
constexpr array_ref(const array_ref&) noexcept = default;
constexpr array_ref(array_ref&&) noexcept = default;
+#ifdef CMT_COMPILER_GNU
constexpr array_ref& operator=(const array_ref&) noexcept = default;
constexpr array_ref& operator=(array_ref&&) noexcept = default;
+#else
+ array_ref& operator=(const array_ref&) = default;
+ array_ref& operator=(array_ref&&) = default;
+#endif
template <size_t N>
constexpr array_ref(value_type (&arr)[N]) noexcept : m_data(arr), m_size(N)
diff --git a/include/kfr/cometa/cstring.hpp b/include/kfr/cometa/cstring.hpp
@@ -31,13 +31,13 @@ struct cstring
template <size_t start, size_t count>
constexpr cstring<count> slice(csize_t<start>, csize_t<count>) const noexcept
{
- return slice_impl(csizeseq<count, start>);
+ return slice_impl(csizeseq_t<count, start>());
}
template <size_t start>
constexpr cstring<N - start> slice(csize_t<start>) const noexcept
{
- return slice_impl(csizeseq<N - 1 - start, start>);
+ return slice_impl(csizeseq_t<N - 1 - start, start>());
}
constexpr friend bool operator==(const cstring& left, const cstring& right) noexcept
@@ -85,14 +85,13 @@ CMT_INLINE constexpr cstring<N1 - 1 + N2 - 1 + 1> concat_str_impl(const cstring<
const cstring<N2>& str2,
csizes_t<indices...>)
{
- constexpr size_t L1 = N1 - 1;
- return { { (indices < L1 ? str1[indices] : str2[indices - L1])..., 0 } };
+ return { { (indices < (N1 - 1) ? str1[indices] : str2[indices - (N1 - 1)])..., 0 } };
}
template <size_t N1, size_t N2, typename... Args>
CMT_INLINE constexpr cstring<N1 - 1 + N2 - 1 + 1> concat_str_impl(const cstring<N1>& str1,
const cstring<N2>& str2)
{
- return concat_str_impl(str1, str2, csizeseq<N1 - 1 + N2 - 1>);
+ return concat_str_impl(str1, str2, cvalseq_t<size_t, N1 - 1 + N2 - 1>());
}
template <size_t N1, size_t Nfrom, size_t Nto, size_t... indices>
CMT_INTRIN cstring<N1 - Nfrom + Nto> str_replace_impl(size_t pos, const cstring<N1>& str,
@@ -125,7 +124,7 @@ CMT_INTRIN constexpr auto concat_cstring(const cstring<N1>& str1, const cstring<
template <size_t N>
CMT_INTRIN constexpr cstring<N> make_cstring(const char (&str)[N])
{
- return details::make_cstring_impl(str, csizeseq<N - 1>);
+ return details::make_cstring_impl(str, cvalseq_t<size_t, N - 1>());
}
template <char... chars>
@@ -154,6 +153,7 @@ template <size_t N1, size_t Nfrom, size_t Nto>
CMT_INTRIN cstring<N1 - Nfrom + Nto> str_replace(const cstring<N1>& str, const cstring<Nfrom>& from,
const cstring<Nto>& to)
{
- return details::str_replace_impl(str_find(str, from), str, from, to, csizeseq<N1 - Nfrom + Nto - 1>);
+ return details::str_replace_impl(str_find(str, from), str, from, to,
+ cvalseq_t<size_t, N1 - Nfrom + Nto - 1>());
}
}
diff --git a/include/kfr/cometa/ctti.hpp b/include/kfr/cometa/ctti.hpp
@@ -45,9 +45,9 @@ constexpr cstring<Nout> gettypename_impl(const char* str, csizes_t<indices...>)
template <typename T>
constexpr auto ctype_name() noexcept
{
- constexpr size_t length =
- sizeof(CMT_FUNC_SIGNATURE) - 1 - details::typename_prefix - details::typename_postfix;
- return details::gettypename_impl(CMT_FUNC_SIGNATURE + details::typename_prefix, csizeseq<length>);
+ return details::gettypename_impl(CMT_FUNC_SIGNATURE + details::typename_prefix,
+ csizeseq_t<(sizeof(CMT_FUNC_SIGNATURE) - 1 - details::typename_prefix -
+ details::typename_postfix)>());
}
/**
diff --git a/include/kfr/cometa/function.hpp b/include/kfr/cometa/function.hpp
@@ -143,7 +143,7 @@ inline function<Ret(Args...)> cdispatch(cvals_t<T, v0, values...>, identity<T> v
if (value == v0)
{
return [=](Args... args)
- CMT_INLINE_MEMBER -> Ret { return fn(cval<T, v0>, std::forward<Args>(args)...); };
+ CMT_INLINE_MEMBER -> Ret { return fn(cval_t<T, v0>(), std::forward<Args>(args)...); };
}
else
{
diff --git a/include/kfr/cometa/named_arg.hpp b/include/kfr/cometa/named_arg.hpp
@@ -5,6 +5,9 @@
#include "../cometa.hpp"
+CMT_PRAGMA_MSVC(warning(push))
+CMT_PRAGMA_MSVC(warning(disable : 4814))
+
namespace cometa
{
template <typename T>
@@ -28,3 +31,5 @@ struct named
inline named operator""_arg(const char* name, size_t) { return name; }
}
+
+CMT_PRAGMA_MSVC(warning(pop))
diff --git a/include/kfr/cometa/string.hpp b/include/kfr/cometa/string.hpp
@@ -12,10 +12,10 @@
#include <string>
#include <utility>
-#pragma GCC diagnostic push
+CMT_PRAGMA_GNU(GCC diagnostic push)
#if CMT_HAS_WARNING("-Wformat-security")
-#pragma GCC diagnostic ignored "-Wformat-security"
-#pragma GCC diagnostic ignored "-Wused-but-marked-unused"
+CMT_PRAGMA_GNU(GCC diagnostic ignored "-Wformat-security")
+CMT_PRAGMA_GNU(GCC diagnostic ignored "-Wused-but-marked-unused")
#endif
namespace cometa
@@ -24,11 +24,12 @@ namespace cometa
template <typename T>
struct representation
{
+ using type = T;
static constexpr const T& get(const T& value) noexcept { return value; }
};
template <typename T>
-using repr_type = decay<decltype(representation<T>::get(std::declval<T>()))>;
+using repr_type = typename representation<T>::type;
template <typename... Args>
CMT_INLINE std::string as_string(const Args&... args);
@@ -102,7 +103,7 @@ CMT_INLINE constexpr auto value_fmt(ctype_t<const void*>) { return make_cstring(
template <char... chars>
CMT_INLINE constexpr auto value_fmt(ctype_t<cchars_t<chars...>>)
{
- return concat_cstring(make_cstring("%s"), make_cstring(cchars<chars...>));
+ return concat_cstring(make_cstring("%s"), make_cstring(cchars_t<chars...>()));
}
template <typename T>
@@ -115,7 +116,7 @@ template <typename T, int width, int prec>
CMT_INLINE constexpr auto value_fmt(ctype_t<fmt_t<T, static_cast<char>(-1), width, prec>> fmt)
{
return concat_cstring(make_cstring("%"), value_fmt_arg(fmt),
- value_fmt(ctype<repr_type<T>>).slice(csize<1>));
+ value_fmt(ctype_t<repr_type<T>>()).slice(csize_t<1>()));
}
template <typename T, char t, int width, int prec>
CMT_INLINE constexpr auto value_fmt(ctype_t<fmt_t<T, t, width, prec>> fmt)
@@ -188,7 +189,7 @@ CMT_INLINE constexpr cstring<N1 - 3 + Nnew> fmt_replace_impl(const cstring<N1>&
template <size_t N1, size_t Nto>
CMT_INLINE constexpr cstring<N1 - 3 + Nto> fmt_replace(const cstring<N1>& str, const cstring<Nto>& newfmt)
{
- return fmt_replace_impl(str, newfmt, csizeseq<N1 - 3 + Nto - 1>);
+ return fmt_replace_impl(str, newfmt, csizeseq_t<N1 - 3 + Nto - 1>());
}
inline std::string replace_one(const std::string& str, const std::string& from, const std::string& to)
@@ -207,8 +208,8 @@ CMT_INLINE const std::string& build_fmt(const std::string& str, ctypes_t<>) { re
template <typename Arg, typename... Args>
CMT_INLINE auto build_fmt(const std::string& str, ctypes_t<Arg, Args...>)
{
- constexpr auto fmt = value_fmt(ctype<decay<Arg>>);
- return build_fmt(replace_one(str, "{}", std::string(fmt.data())), ctypes<Args...>);
+ constexpr auto fmt = value_fmt(ctype_t<decay<Arg>>());
+ return build_fmt(replace_one(str, "{}", std::string(fmt.data())), ctypes_t<Args...>());
}
}
@@ -224,22 +225,24 @@ CMT_INLINE details::fmt_t<T, static_cast<char>(-1), width, prec> fmtwidth(const
return { value };
}
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wgnu-string-literal-operator-template"
+CMT_PRAGMA_GNU(GCC diagnostic push)
+CMT_PRAGMA_GNU(GCC diagnostic ignored "-Wpragmas")
+CMT_PRAGMA_GNU(GCC diagnostic ignored "-Wgnu-string-literal-operator-template")
constexpr auto build_fmt_str(cchars_t<>, ctypes_t<>) { return make_cstring(""); }
template <char... chars, typename Arg, typename... Args>
constexpr auto build_fmt_str(cchars_t<'@', chars...>, ctypes_t<Arg, Args...>)
{
- return concat_cstring(details::value_fmt(ctype<decay<Arg>>),
- build_fmt_str(cchars<chars...>, ctypes<Args...>));
+ return concat_cstring(details::value_fmt(ctype_t<decay<Arg>>()),
+ build_fmt_str(cchars_t<chars...>(), ctypes_t<Args...>()));
}
template <char ch, char... chars, typename... Args>
constexpr auto build_fmt_str(cchars_t<ch, chars...>, ctypes_t<Args...>)
{
- return concat_cstring(make_cstring(cchars<ch>), build_fmt_str(cchars<chars...>, ctypes<Args...>));
+ return concat_cstring(make_cstring(cchars_t<ch>()),
+ build_fmt_str(cchars_t<chars...>(), ctypes_t<Args...>()));
}
template <char... chars>
@@ -248,7 +251,7 @@ struct format_t
template <typename... Args>
inline std::string operator()(const Args&... args)
{
- constexpr auto format_str = build_fmt_str(cchars<chars...>, ctypes<repr_type<Args>...>);
+ constexpr auto format_str = build_fmt_str(cchars_t<chars...>(), ctypes_t<repr_type<Args>...>());
std::string result;
const int size = std::snprintf(nullptr, 0, format_str.data(), details::pack_value(args)...);
@@ -267,12 +270,14 @@ struct print_t
template <typename... Args>
CMT_INLINE void operator()(const Args&... args)
{
- constexpr auto format_str = build_fmt_str(cchars<chars...>, ctypes<repr_type<Args>...>);
+ constexpr auto format_str = build_fmt_str(cchars_t<chars...>(), ctypes_t<repr_type<Args>...>());
std::printf(format_str.data(), details::pack_value(args)...);
}
};
+#ifdef CMT_COMPILER_GNU
+
template <typename Char, Char... chars>
constexpr format_t<chars...> operator""_format()
{
@@ -285,26 +290,28 @@ constexpr CMT_INLINE print_t<chars...> operator""_print()
return {};
}
-#pragma GCC diagnostic pop
+#endif
+
+CMT_PRAGMA_GNU(GCC diagnostic pop)
template <typename... Args>
CMT_INLINE void printfmt(const std::string& fmt, const Args&... args)
{
- const auto format_str = details::build_fmt(fmt, ctypes<repr_type<Args>...>);
+ const auto format_str = details::build_fmt(fmt, ctypes_t<repr_type<Args>...>());
std::printf(format_str.data(), details::pack_value(representation<Args>::get(args))...);
}
template <typename... Args>
CMT_INLINE void fprintfmt(FILE* f, const std::string& fmt, const Args&... args)
{
- const auto format_str = details::build_fmt(fmt, ctypes<repr_type<Args>...>);
+ const auto format_str = details::build_fmt(fmt, ctypes_t<repr_type<Args>...>());
std::fprintf(f, format_str.data(), details::pack_value(representation<Args>::get(args))...);
}
template <typename... Args>
CMT_INLINE int snprintfmt(char* str, size_t size, const std::string& fmt, const Args&... args)
{
- const auto format_str = details::build_fmt(fmt, ctypes<repr_type<Args>...>);
+ const auto format_str = details::build_fmt(fmt, ctypes_t<repr_type<Args>...>());
return std::snprintf(str, size, format_str.data(),
details::pack_value(representation<Args>::get(args))...);
}
@@ -313,7 +320,7 @@ template <typename... Args>
CMT_INLINE std::string format(const std::string& fmt, const Args&... args)
{
std::string result;
- const auto format_str = details::build_fmt(fmt, ctypes<repr_type<Args>...>);
+ const auto format_str = details::build_fmt(fmt, ctypes_t<repr_type<Args>...>());
const int size =
std::snprintf(nullptr, 0, format_str.data(), details::pack_value(representation<Args>::get(args))...);
if (size <= 0)
@@ -329,22 +336,24 @@ namespace details
template <typename T>
constexpr auto get_value_fmt()
{
- return details::value_fmt(ctype<decay<repr_type<T>>>);
+ return details::value_fmt(ctype_t<decay<repr_type<T>>>());
}
}
template <typename... Args>
CMT_INLINE void print(const Args&... args)
{
- constexpr auto format_str = concat_cstring(details::get_value_fmt<Args>()...);
- std::printf(format_str.data(), details::pack_value(representation<Args>::get(args))...);
+ constexpr const auto format_str = concat_cstring(details::get_value_fmt<Args>()...);
+ const char* str = format_str.data();
+ std::printf(str, details::pack_value(representation<Args>::get(args))...);
}
template <typename... Args>
CMT_INLINE void println(const Args&... args)
{
- constexpr auto format_str = concat_cstring(details::get_value_fmt<Args>()..., make_cstring("\n"));
- std::printf(format_str.data(), details::pack_value(representation<Args>::get(args))...);
+ constexpr const auto format_str = concat_cstring(details::get_value_fmt<Args>()..., make_cstring("\n"));
+ const char* str = format_str.data();
+ std::printf(str, details::pack_value(representation<Args>::get(args))...);
}
template <typename... Args>
@@ -352,13 +361,13 @@ CMT_INLINE std::string as_string(const Args&... args)
{
std::string result;
constexpr auto format_str = concat_cstring(details::get_value_fmt<Args>()...);
+ const char* str = format_str.data();
- const int size =
- std::snprintf(nullptr, 0, format_str.data(), details::pack_value(representation<Args>::get(args))...);
+ const int size = std::snprintf(nullptr, 0, str, details::pack_value(representation<Args>::get(args))...);
if (size <= 0)
return result;
result.resize(size_t(size + 1));
- result.resize(size_t(std::snprintf(&result[0], size_t(size + 1), format_str.data(),
+ result.resize(size_t(std::snprintf(&result[0], size_t(size + 1), str,
details::pack_value(representation<Args>::get(args))...)));
return result;
}
@@ -402,6 +411,7 @@ inline std::string join(T x, U y, Ts... rest)
template <typename T>
struct representation<named_arg<T>>
{
+ using type = std::string;
static std::string get(const named_arg<T>& value)
{
return std::string(value.name) + " = " + as_string(value.value);
@@ -411,6 +421,7 @@ struct representation<named_arg<T>>
template <typename T1, typename T2>
struct representation<std::pair<T1, T2>>
{
+ using type = std::string;
static std::string get(const std::pair<T1, T2>& value)
{
return "(" + as_string(value.first) + "; " + as_string(value.second) + ")";
@@ -418,4 +429,4 @@ struct representation<std::pair<T1, T2>>
};
}
-#pragma GCC diagnostic pop
+CMT_PRAGMA_GNU(GCC diagnostic pop)
diff --git a/include/kfr/data/sincos.hpp b/include/kfr/data/sincos.hpp
@@ -32,82 +32,160 @@ namespace data
{
// data generated by mpfr
-template <typename T>
-constexpr T c_sin_table[64] = {
- /* sin(2*pi* 0/ 256) */ T(0.0),
- /* sin(2*pi* 1/ 256) */ T(0.02454122852291228803173452945928292506547),
- /* sin(2*pi* 2/ 256) */ T(0.04906767432741801425495497694268265831475),
- /* sin(2*pi* 3/ 256) */ T(0.0735645635996674235294656215752343218133),
- /* sin(2*pi* 4/ 256) */ T(0.09801714032956060199419556388864184586114),
- /* sin(2*pi* 5/ 256) */ T(0.1224106751992161984987044741509457875752),
- /* sin(2*pi* 6/ 256) */ T(0.1467304744553617516588501296467178197062),
- /* sin(2*pi* 7/ 256) */ T(0.1709618887603012263636423572082635319663),
- /* sin(2*pi* 8/ 256) */ T(0.1950903220161282678482848684770222409277),
- /* sin(2*pi* 9/ 256) */ T(0.2191012401568697972277375474973577988484),
- /* sin(2*pi* 10/ 256) */ T(0.242980179903263889948274162077471118321),
- /* sin(2*pi* 11/ 256) */ T(0.2667127574748983863252865151164363940421),
- /* sin(2*pi* 12/ 256) */ T(0.2902846772544623676361923758173952746915),
- /* sin(2*pi* 13/ 256) */ T(0.3136817403988914766564788459941003099934),
- /* sin(2*pi* 14/ 256) */ T(0.3368898533922200506892532126191475704778),
- /* sin(2*pi* 15/ 256) */ T(0.3598950365349881487751045723267564202023),
- /* sin(2*pi* 16/ 256) */ T(0.3826834323650897717284599840303988667613),
- /* sin(2*pi* 17/ 256) */ T(0.4052413140049898709084813055050524665119),
- /* sin(2*pi* 18/ 256) */ T(0.4275550934302820943209668568887985343046),
- /* sin(2*pi* 19/ 256) */ T(0.4496113296546066000462945794242270758832),
- /* sin(2*pi* 20/ 256) */ T(0.4713967368259976485563876259052543776575),
- /* sin(2*pi* 21/ 256) */ T(0.4928981922297840368730266887588092682397),
- /* sin(2*pi* 22/ 256) */ T(0.514102744193221726593693838968815772608),
- /* sin(2*pi* 23/ 256) */ T(0.5349976198870972106630769046370179155603),
- /* sin(2*pi* 24/ 256) */ T(0.5555702330196022247428308139485328743749),
- /* sin(2*pi* 25/ 256) */ T(0.575808191417845300745972453815730841776),
- /* sin(2*pi* 26/ 256) */ T(0.5956993044924333434670365288299698895119),
- /* sin(2*pi* 27/ 256) */ T(0.6152315905806268454849135634139842776594),
- /* sin(2*pi* 28/ 256) */ T(0.6343932841636454982151716132254933706757),
- /* sin(2*pi* 29/ 256) */ T(0.6531728429537767640842030136563054150769),
- /* sin(2*pi* 30/ 256) */ T(0.6715589548470184006253768504274218032288),
- /* sin(2*pi* 31/ 256) */ T(0.6895405447370669246167306299574847028455),
- /* sin(2*pi* 32/ 256) */ T(0.7071067811865475244008443621048490392848),
- /* sin(2*pi* 33/ 256) */ T(0.7242470829514669209410692432905531674831),
- /* sin(2*pi* 34/ 256) */ T(0.740951125354959091175616897495162729729),
- /* sin(2*pi* 35/ 256) */ T(0.7572088465064845475754640536057844730404),
- /* sin(2*pi* 36/ 256) */ T(0.773010453362736960810906609758469800971),
- /* sin(2*pi* 37/ 256) */ T(0.7883464276266062620091647053596892826565),
- /* sin(2*pi* 38/ 256) */ T(0.8032075314806449098066765129631419238796),
- /* sin(2*pi* 39/ 256) */ T(0.817584813151583696504920884130633809471),
- /* sin(2*pi* 40/ 256) */ T(0.8314696123025452370787883776179057567386),
- /* sin(2*pi* 41/ 256) */ T(0.8448535652497070732595712051049570977198),
- /* sin(2*pi* 42/ 256) */ T(0.8577286100002720699022699842847701370425),
- /* sin(2*pi* 43/ 256) */ T(0.8700869911087114186522924044838488439108),
- /* sin(2*pi* 44/ 256) */ T(0.8819212643483550297127568636603883495084),
- /* sin(2*pi* 45/ 256) */ T(0.8932243011955153203424164474933979780006),
- /* sin(2*pi* 46/ 256) */ T(0.9039892931234433315862002972305370487101),
- /* sin(2*pi* 47/ 256) */ T(0.9142097557035306546350148293935774010447),
- /* sin(2*pi* 48/ 256) */ T(0.9238795325112867561281831893967882868224),
- /* sin(2*pi* 49/ 256) */ T(0.932992798834738887711660255543302498295),
- /* sin(2*pi* 50/ 256) */ T(0.9415440651830207784125094025995023571856),
- /* sin(2*pi* 51/ 256) */ T(0.9495281805930366671959360741893450282522),
- /* sin(2*pi* 52/ 256) */ T(0.9569403357322088649357978869802699694828),
- /* sin(2*pi* 53/ 256) */ T(0.9637760657954398666864643555078351536631),
- /* sin(2*pi* 54/ 256) */ T(0.9700312531945439926039842072861002514569),
- /* sin(2*pi* 55/ 256) */ T(0.975702130038528544460395766419527971644),
- /* sin(2*pi* 56/ 256) */ T(0.9807852804032304491261822361342390369739),
- /* sin(2*pi* 57/ 256) */ T(0.9852776423889412447740184331785477871601),
- /* sin(2*pi* 58/ 256) */ T(0.9891765099647809734516737380162430639837),
- /* sin(2*pi* 59/ 256) */ T(0.9924795345987099981567672516611178200108),
- /* sin(2*pi* 60/ 256) */ T(0.9951847266721968862448369531094799215755),
- /* sin(2*pi* 61/ 256) */ T(0.9972904566786902161355971401825678211717),
- /* sin(2*pi* 62/ 256) */ T(0.9987954562051723927147716047591006944432),
- /* sin(2*pi* 63/ 256) */ T(0.9996988186962042201157656496661721968501)
+constexpr f32 c_sin_table_f32[64] = {
+ /* sin(2*pi* 0/ 256) */ f32(0.0),
+ /* sin(2*pi* 1/ 256) */ f32(0.02454122852291228803173452945928292506547),
+ /* sin(2*pi* 2/ 256) */ f32(0.04906767432741801425495497694268265831475),
+ /* sin(2*pi* 3/ 256) */ f32(0.0735645635996674235294656215752343218133),
+ /* sin(2*pi* 4/ 256) */ f32(0.09801714032956060199419556388864184586114),
+ /* sin(2*pi* 5/ 256) */ f32(0.1224106751992161984987044741509457875752),
+ /* sin(2*pi* 6/ 256) */ f32(0.1467304744553617516588501296467178197062),
+ /* sin(2*pi* 7/ 256) */ f32(0.1709618887603012263636423572082635319663),
+ /* sin(2*pi* 8/ 256) */ f32(0.1950903220161282678482848684770222409277),
+ /* sin(2*pi* 9/ 256) */ f32(0.2191012401568697972277375474973577988484),
+ /* sin(2*pi* 10/ 256) */ f32(0.242980179903263889948274162077471118321),
+ /* sin(2*pi* 11/ 256) */ f32(0.2667127574748983863252865151164363940421),
+ /* sin(2*pi* 12/ 256) */ f32(0.2902846772544623676361923758173952746915),
+ /* sin(2*pi* 13/ 256) */ f32(0.3136817403988914766564788459941003099934),
+ /* sin(2*pi* 14/ 256) */ f32(0.3368898533922200506892532126191475704778),
+ /* sin(2*pi* 15/ 256) */ f32(0.3598950365349881487751045723267564202023),
+ /* sin(2*pi* 16/ 256) */ f32(0.3826834323650897717284599840303988667613),
+ /* sin(2*pi* 17/ 256) */ f32(0.4052413140049898709084813055050524665119),
+ /* sin(2*pi* 18/ 256) */ f32(0.4275550934302820943209668568887985343046),
+ /* sin(2*pi* 19/ 256) */ f32(0.4496113296546066000462945794242270758832),
+ /* sin(2*pi* 20/ 256) */ f32(0.4713967368259976485563876259052543776575),
+ /* sin(2*pi* 21/ 256) */ f32(0.4928981922297840368730266887588092682397),
+ /* sin(2*pi* 22/ 256) */ f32(0.514102744193221726593693838968815772608),
+ /* sin(2*pi* 23/ 256) */ f32(0.5349976198870972106630769046370179155603),
+ /* sin(2*pi* 24/ 256) */ f32(0.5555702330196022247428308139485328743749),
+ /* sin(2*pi* 25/ 256) */ f32(0.575808191417845300745972453815730841776),
+ /* sin(2*pi* 26/ 256) */ f32(0.5956993044924333434670365288299698895119),
+ /* sin(2*pi* 27/ 256) */ f32(0.6152315905806268454849135634139842776594),
+ /* sin(2*pi* 28/ 256) */ f32(0.6343932841636454982151716132254933706757),
+ /* sin(2*pi* 29/ 256) */ f32(0.6531728429537767640842030136563054150769),
+ /* sin(2*pi* 30/ 256) */ f32(0.6715589548470184006253768504274218032288),
+ /* sin(2*pi* 31/ 256) */ f32(0.6895405447370669246167306299574847028455),
+ /* sin(2*pi* 32/ 256) */ f32(0.7071067811865475244008443621048490392848),
+ /* sin(2*pi* 33/ 256) */ f32(0.7242470829514669209410692432905531674831),
+ /* sin(2*pi* 34/ 256) */ f32(0.740951125354959091175616897495162729729),
+ /* sin(2*pi* 35/ 256) */ f32(0.7572088465064845475754640536057844730404),
+ /* sin(2*pi* 36/ 256) */ f32(0.773010453362736960810906609758469800971),
+ /* sin(2*pi* 37/ 256) */ f32(0.7883464276266062620091647053596892826565),
+ /* sin(2*pi* 38/ 256) */ f32(0.8032075314806449098066765129631419238796),
+ /* sin(2*pi* 39/ 256) */ f32(0.817584813151583696504920884130633809471),
+ /* sin(2*pi* 40/ 256) */ f32(0.8314696123025452370787883776179057567386),
+ /* sin(2*pi* 41/ 256) */ f32(0.8448535652497070732595712051049570977198),
+ /* sin(2*pi* 42/ 256) */ f32(0.8577286100002720699022699842847701370425),
+ /* sin(2*pi* 43/ 256) */ f32(0.8700869911087114186522924044838488439108),
+ /* sin(2*pi* 44/ 256) */ f32(0.8819212643483550297127568636603883495084),
+ /* sin(2*pi* 45/ 256) */ f32(0.8932243011955153203424164474933979780006),
+ /* sin(2*pi* 46/ 256) */ f32(0.9039892931234433315862002972305370487101),
+ /* sin(2*pi* 47/ 256) */ f32(0.9142097557035306546350148293935774010447),
+ /* sin(2*pi* 48/ 256) */ f32(0.9238795325112867561281831893967882868224),
+ /* sin(2*pi* 49/ 256) */ f32(0.932992798834738887711660255543302498295),
+ /* sin(2*pi* 50/ 256) */ f32(0.9415440651830207784125094025995023571856),
+ /* sin(2*pi* 51/ 256) */ f32(0.9495281805930366671959360741893450282522),
+ /* sin(2*pi* 52/ 256) */ f32(0.9569403357322088649357978869802699694828),
+ /* sin(2*pi* 53/ 256) */ f32(0.9637760657954398666864643555078351536631),
+ /* sin(2*pi* 54/ 256) */ f32(0.9700312531945439926039842072861002514569),
+ /* sin(2*pi* 55/ 256) */ f32(0.975702130038528544460395766419527971644),
+ /* sin(2*pi* 56/ 256) */ f32(0.9807852804032304491261822361342390369739),
+ /* sin(2*pi* 57/ 256) */ f32(0.9852776423889412447740184331785477871601),
+ /* sin(2*pi* 58/ 256) */ f32(0.9891765099647809734516737380162430639837),
+ /* sin(2*pi* 59/ 256) */ f32(0.9924795345987099981567672516611178200108),
+ /* sin(2*pi* 60/ 256) */ f32(0.9951847266721968862448369531094799215755),
+ /* sin(2*pi* 61/ 256) */ f32(0.9972904566786902161355971401825678211717),
+ /* sin(2*pi* 62/ 256) */ f32(0.9987954562051723927147716047591006944432),
+ /* sin(2*pi* 63/ 256) */ f32(0.9996988186962042201157656496661721968501)
+};
+
+// data generated by mpfr
+constexpr f64 c_sin_table_f64[64] = {
+ /* sin(2*pi* 0/ 256) */ f64(0.0),
+ /* sin(2*pi* 1/ 256) */ f64(0.02454122852291228803173452945928292506547),
+ /* sin(2*pi* 2/ 256) */ f64(0.04906767432741801425495497694268265831475),
+ /* sin(2*pi* 3/ 256) */ f64(0.0735645635996674235294656215752343218133),
+ /* sin(2*pi* 4/ 256) */ f64(0.09801714032956060199419556388864184586114),
+ /* sin(2*pi* 5/ 256) */ f64(0.1224106751992161984987044741509457875752),
+ /* sin(2*pi* 6/ 256) */ f64(0.1467304744553617516588501296467178197062),
+ /* sin(2*pi* 7/ 256) */ f64(0.1709618887603012263636423572082635319663),
+ /* sin(2*pi* 8/ 256) */ f64(0.1950903220161282678482848684770222409277),
+ /* sin(2*pi* 9/ 256) */ f64(0.2191012401568697972277375474973577988484),
+ /* sin(2*pi* 10/ 256) */ f64(0.242980179903263889948274162077471118321),
+ /* sin(2*pi* 11/ 256) */ f64(0.2667127574748983863252865151164363940421),
+ /* sin(2*pi* 12/ 256) */ f64(0.2902846772544623676361923758173952746915),
+ /* sin(2*pi* 13/ 256) */ f64(0.3136817403988914766564788459941003099934),
+ /* sin(2*pi* 14/ 256) */ f64(0.3368898533922200506892532126191475704778),
+ /* sin(2*pi* 15/ 256) */ f64(0.3598950365349881487751045723267564202023),
+ /* sin(2*pi* 16/ 256) */ f64(0.3826834323650897717284599840303988667613),
+ /* sin(2*pi* 17/ 256) */ f64(0.4052413140049898709084813055050524665119),
+ /* sin(2*pi* 18/ 256) */ f64(0.4275550934302820943209668568887985343046),
+ /* sin(2*pi* 19/ 256) */ f64(0.4496113296546066000462945794242270758832),
+ /* sin(2*pi* 20/ 256) */ f64(0.4713967368259976485563876259052543776575),
+ /* sin(2*pi* 21/ 256) */ f64(0.4928981922297840368730266887588092682397),
+ /* sin(2*pi* 22/ 256) */ f64(0.514102744193221726593693838968815772608),
+ /* sin(2*pi* 23/ 256) */ f64(0.5349976198870972106630769046370179155603),
+ /* sin(2*pi* 24/ 256) */ f64(0.5555702330196022247428308139485328743749),
+ /* sin(2*pi* 25/ 256) */ f64(0.575808191417845300745972453815730841776),
+ /* sin(2*pi* 26/ 256) */ f64(0.5956993044924333434670365288299698895119),
+ /* sin(2*pi* 27/ 256) */ f64(0.6152315905806268454849135634139842776594),
+ /* sin(2*pi* 28/ 256) */ f64(0.6343932841636454982151716132254933706757),
+ /* sin(2*pi* 29/ 256) */ f64(0.6531728429537767640842030136563054150769),
+ /* sin(2*pi* 30/ 256) */ f64(0.6715589548470184006253768504274218032288),
+ /* sin(2*pi* 31/ 256) */ f64(0.6895405447370669246167306299574847028455),
+ /* sin(2*pi* 32/ 256) */ f64(0.7071067811865475244008443621048490392848),
+ /* sin(2*pi* 33/ 256) */ f64(0.7242470829514669209410692432905531674831),
+ /* sin(2*pi* 34/ 256) */ f64(0.740951125354959091175616897495162729729),
+ /* sin(2*pi* 35/ 256) */ f64(0.7572088465064845475754640536057844730404),
+ /* sin(2*pi* 36/ 256) */ f64(0.773010453362736960810906609758469800971),
+ /* sin(2*pi* 37/ 256) */ f64(0.7883464276266062620091647053596892826565),
+ /* sin(2*pi* 38/ 256) */ f64(0.8032075314806449098066765129631419238796),
+ /* sin(2*pi* 39/ 256) */ f64(0.817584813151583696504920884130633809471),
+ /* sin(2*pi* 40/ 256) */ f64(0.8314696123025452370787883776179057567386),
+ /* sin(2*pi* 41/ 256) */ f64(0.8448535652497070732595712051049570977198),
+ /* sin(2*pi* 42/ 256) */ f64(0.8577286100002720699022699842847701370425),
+ /* sin(2*pi* 43/ 256) */ f64(0.8700869911087114186522924044838488439108),
+ /* sin(2*pi* 44/ 256) */ f64(0.8819212643483550297127568636603883495084),
+ /* sin(2*pi* 45/ 256) */ f64(0.8932243011955153203424164474933979780006),
+ /* sin(2*pi* 46/ 256) */ f64(0.9039892931234433315862002972305370487101),
+ /* sin(2*pi* 47/ 256) */ f64(0.9142097557035306546350148293935774010447),
+ /* sin(2*pi* 48/ 256) */ f64(0.9238795325112867561281831893967882868224),
+ /* sin(2*pi* 49/ 256) */ f64(0.932992798834738887711660255543302498295),
+ /* sin(2*pi* 50/ 256) */ f64(0.9415440651830207784125094025995023571856),
+ /* sin(2*pi* 51/ 256) */ f64(0.9495281805930366671959360741893450282522),
+ /* sin(2*pi* 52/ 256) */ f64(0.9569403357322088649357978869802699694828),
+ /* sin(2*pi* 53/ 256) */ f64(0.9637760657954398666864643555078351536631),
+ /* sin(2*pi* 54/ 256) */ f64(0.9700312531945439926039842072861002514569),
+ /* sin(2*pi* 55/ 256) */ f64(0.975702130038528544460395766419527971644),
+ /* sin(2*pi* 56/ 256) */ f64(0.9807852804032304491261822361342390369739),
+ /* sin(2*pi* 57/ 256) */ f64(0.9852776423889412447740184331785477871601),
+ /* sin(2*pi* 58/ 256) */ f64(0.9891765099647809734516737380162430639837),
+ /* sin(2*pi* 59/ 256) */ f64(0.9924795345987099981567672516611178200108),
+ /* sin(2*pi* 60/ 256) */ f64(0.9951847266721968862448369531094799215755),
+ /* sin(2*pi* 61/ 256) */ f64(0.9972904566786902161355971401825678211717),
+ /* sin(2*pi* 62/ 256) */ f64(0.9987954562051723927147716047591006944432),
+ /* sin(2*pi* 63/ 256) */ f64(0.9996988186962042201157656496661721968501)
};
}
template <typename T>
-constexpr inline T sin_using_table_256(size_t k)
+constexpr inline T sin_using_table_256(size_t k);
+
+template <>
+constexpr inline f32 sin_using_table_256<f32>(size_t k)
+{
+ return (k == 0 || k == 128) ? 0.f : (k == 64) ? 1.f : (k > 128)
+ ? -sin_using_table_256<f32>(k - 128)
+ : (k > 64) ? sin_using_table_256<f32>(128 - k)
+ : data::c_sin_table_f32[k];
+}
+template <>
+constexpr inline f64 sin_using_table_256<f64>(size_t k)
{
- return (k == 0 || k == 128) ? T(0) : (k == 64) ? T(1) : (k > 128)
- ? -sin_using_table_256<T>(k - 128)
- : (k > 64) ? sin_using_table_256<T>(128 - k)
- : data::c_sin_table<T>[k];
+ return (k == 0 || k == 128) ? 0.0 : (k == 64) ? 1.0 : (k > 128)
+ ? -sin_using_table_256<f64>(k - 128)
+ : (k > 64) ? sin_using_table_256<f64>(128 - k)
+ : data::c_sin_table_f64[k];
}
template <typename T>
diff --git a/include/kfr/dft/bitrev.hpp b/include/kfr/dft/bitrev.hpp
@@ -43,7 +43,7 @@ namespace internal
constexpr bool fft_reorder_aligned = false;
template <size_t Bits>
-constexpr inline u32 bitrev_using_table(u32 x)
+CMT_GNU_CONSTEXPR inline u32 bitrev_using_table(u32 x)
{
constexpr size_t bitrev_table_log2N = ilog2(arraysize(data::bitrev_table));
if (Bits > bitrev_table_log2N)
@@ -52,7 +52,7 @@ constexpr inline u32 bitrev_using_table(u32 x)
return data::bitrev_table[x] >> (bitrev_table_log2N - Bits);
}
-constexpr inline u32 bitrev_using_table(u32 x, size_t bits)
+CMT_GNU_CONSTEXPR inline u32 bitrev_using_table(u32 x, size_t bits)
{
constexpr size_t bitrev_table_log2N = ilog2(arraysize(data::bitrev_table));
if (bits > bitrev_table_log2N)
@@ -61,7 +61,7 @@ constexpr inline u32 bitrev_using_table(u32 x, size_t bits)
return data::bitrev_table[x] >> (bitrev_table_log2N - bits);
}
-constexpr inline u32 dig4rev_using_table(u32 x, size_t bits)
+CMT_GNU_CONSTEXPR inline u32 dig4rev_using_table(u32 x, size_t bits)
{
constexpr size_t bitrev_table_log2N = ilog2(arraysize(data::bitrev_table));
if (bits > bitrev_table_log2N)
@@ -253,10 +253,10 @@ KFR_INTRIN void fft_reorder(complex<T>* inout, csize_t<9>)
}
template <typename T, bool use_br2>
-void cwrite_reordered(T* out, cvec<T, 16> value, size_t N4, cbool_t<use_br2>)
+void cwrite_reordered(T* out, const cvec<T, 16>& value, size_t N4, cbool_t<use_br2>)
{
- value = digitreverse < use_br2 ? 2 : 4, 2 > (value);
- cwrite_group<4, 4, fft_reorder_aligned>(ptr_cast<complex<T>>(out), N4, value);
+ cwrite_group<4, 4, fft_reorder_aligned>(ptr_cast<complex<T>>(out), N4,
+ digitreverse<(use_br2 ? 2 : 4), 2>(value));
}
template <typename T, bool use_br2>
@@ -265,8 +265,8 @@ KFR_INTRIN void fft_reorder_swap_n4(T* inout, size_t i, size_t j, size_t N4, cbo
CMT_ASSUME(i != j);
const cvec<T, 16> vi = cread_group<4, 4, fft_reorder_aligned>(ptr_cast<complex<T>>(inout + i), N4);
const cvec<T, 16> vj = cread_group<4, 4, fft_reorder_aligned>(ptr_cast<complex<T>>(inout + j), N4);
- cwrite_reordered(inout + j, vi, N4, cbool<use_br2>);
- cwrite_reordered(inout + i, vj, N4, cbool<use_br2>);
+ cwrite_reordered(inout + j, vi, N4, cbool_t<use_br2>());
+ cwrite_reordered(inout + i, vj, N4, cbool_t<use_br2>());
}
template <typename T>
@@ -316,7 +316,7 @@ KFR_INTRIN void fft_reorder(complex<T>* inout, size_t log2n, cfalse_t use_br2)
T* io = ptr_cast<T>(inout);
size_t i = 0;
-#pragma clang loop unroll_count(2)
+ CMT_PRAGMA_CLANG(clang loop unroll_count(2))
for (; i < iend;)
{
size_t j = dig4rev_using_table(static_cast<u32>(i >> 3), log2n - 4) << 3;
@@ -326,7 +326,7 @@ KFR_INTRIN void fft_reorder(complex<T>* inout, size_t log2n, cfalse_t use_br2)
i += istep * 4;
}
iend += N16;
-#pragma clang loop unroll_count(2)
+ CMT_PRAGMA_CLANG(clang loop unroll_count(2))
for (; i < iend;)
{
size_t j = dig4rev_using_table(static_cast<u32>(i >> 3), log2n - 4) << 3;
@@ -341,7 +341,7 @@ KFR_INTRIN void fft_reorder(complex<T>* inout, size_t log2n, cfalse_t use_br2)
i += istep * 3;
}
iend += N16;
-#pragma clang loop unroll_count(2)
+ CMT_PRAGMA_CLANG(clang loop unroll_count(2))
for (; i < iend;)
{
size_t j = dig4rev_using_table(static_cast<u32>(i >> 3), log2n - 4) << 3;
@@ -361,7 +361,7 @@ KFR_INTRIN void fft_reorder(complex<T>* inout, size_t log2n, cfalse_t use_br2)
i += istep * 2;
}
iend += N16;
-#pragma clang loop unroll_count(2)
+ CMT_PRAGMA_CLANG(clang loop unroll_count(2))
for (; i < iend;)
{
size_t j = dig4rev_using_table(static_cast<u32>(i >> 3), log2n - 4) << 3;
diff --git a/include/kfr/dft/cache.hpp b/include/kfr/dft/cache.hpp
@@ -39,11 +39,12 @@ using dft_plan_ptr = std::shared_ptr<const dft_plan<T>>;
template <typename T>
using dft_plan_real_ptr = std::shared_ptr<const dft_plan_real<T>>;
-struct dft_cache
+template <int = 0>
+struct dft_cache_impl
{
- static dft_cache& instance()
+ static dft_cache_impl& instance()
{
- static dft_cache cache;
+ static dft_cache_impl cache;
return cache;
}
dft_plan_ptr<f32> get(ctype_t<f32>, size_t size)
@@ -120,10 +121,12 @@ private:
#endif
};
+using dft_cache = dft_cache_impl<>;
+
template <typename T, size_t Tag>
univector<complex<T>> dft(const univector<complex<T>, Tag>& input)
{
- dft_plan_ptr<T> dft = dft_cache::instance().get(ctype<T>, input.size());
+ dft_plan_ptr<T> dft = dft_cache::instance().get(ctype_t<T>(), input.size());
univector<complex<T>> output(input.size());
univector<u8> temp(dft->temp_size);
dft->execute(output, input, temp);
@@ -133,7 +136,7 @@ univector<complex<T>> dft(const univector<complex<T>, Tag>& input)
template <typename T, size_t Tag>
univector<complex<T>> idft(const univector<complex<T>, Tag>& input)
{
- dft_plan_ptr<T> dft = dft_cache::instance().get(ctype<T>, input.size());
+ dft_plan_ptr<T> dft = dft_cache::instance().get(ctype_t<T>(), input.size());
univector<complex<T>> output(input.size());
univector<u8> temp(dft->temp_size);
dft->execute(output, input, temp, ctrue);
@@ -143,7 +146,7 @@ univector<complex<T>> idft(const univector<complex<T>, Tag>& input)
template <typename T, size_t Tag>
univector<complex<T>> realdft(const univector<T, Tag>& input)
{
- dft_plan_real_ptr<T> dft = dft_cache::instance().getreal(ctype<T>, input.size());
+ dft_plan_real_ptr<T> dft = dft_cache::instance().getreal(ctype_t<T>(), input.size());
univector<complex<T>> output(input.size() / 2 + 1);
univector<u8> temp(dft->temp_size);
dft->execute(output, input, temp);
@@ -153,7 +156,7 @@ univector<complex<T>> realdft(const univector<T, Tag>& input)
template <typename T, size_t Tag>
univector<T> irealdft(const univector<complex<T>, Tag>& input)
{
- dft_plan_real_ptr<T> dft = dft_cache::instance().getreal(ctype<T>, input.size());
+ dft_plan_real_ptr<T> dft = dft_cache::instance().getreal(ctype_t<T>(), input.size());
univector<T> output((input.size() - 1) * 2);
univector<u8> temp(dft->temp_size);
dft->execute(output, input, temp);
diff --git a/include/kfr/dft/conv.hpp b/include/kfr/dft/conv.hpp
@@ -34,9 +34,9 @@
#include "cache.hpp"
#include "fft.hpp"
-#pragma clang diagnostic push
+CMT_PRAGMA_GNU(GCC diagnostic push)
#if CMT_HAS_WARNING("-Wshadow")
-#pragma clang diagnostic ignored "-Wshadow"
+CMT_PRAGMA_GNU(GCC diagnostic ignored "-Wshadow")
#endif
namespace kfr
@@ -51,7 +51,7 @@ KFR_INTRIN univector<T> convolve(const univector<T, Tag1>& src1, const univector
src1padded.resize(size, 0);
src2padded.resize(size, 0);
- dft_plan_ptr<T> dft = dft_cache::instance().get(ctype<T>, size);
+ dft_plan_ptr<T> dft = dft_cache::instance().get(ctype_t<T>(), size);
univector<u8> temp(dft->temp_size);
dft->execute(src1padded, src1padded, temp);
dft->execute(src2padded, src2padded, temp);
@@ -69,7 +69,7 @@ KFR_INTRIN univector<T> correlate(const univector<T, Tag1>& src1, const univecto
univector<complex<T>> src2padded = reverse(src2);
src1padded.resize(size, 0);
src2padded.resize(size, 0);
- dft_plan_ptr<T> dft = dft_cache::instance().get(ctype<T>, size);
+ dft_plan_ptr<T> dft = dft_cache::instance().get(ctype_t<T>(), size);
univector<u8> temp(dft->temp_size);
dft->execute(src1padded, src1padded, temp);
dft->execute(src2padded, src2padded, temp);
@@ -87,4 +87,4 @@ KFR_INTRIN univector<T> autocorrelate(const univector<T, Tag1>& src)
return result;
}
}
-#pragma clang diagnostic pop
+CMT_PRAGMA_GNU(GCC diagnostic pop)
diff --git a/include/kfr/dft/fft.hpp b/include/kfr/dft/fft.hpp
@@ -35,11 +35,14 @@
#include "bitrev.hpp"
#include "ft.hpp"
-#pragma clang diagnostic push
+CMT_PRAGMA_GNU(GCC diagnostic push)
#if CMT_HAS_WARNING("-Wshadow")
-#pragma clang diagnostic ignored "-Wshadow"
+CMT_PRAGMA_GNU(GCC diagnostic ignored "-Wshadow")
#endif
+CMT_PRAGMA_MSVC(warning(push))
+CMT_PRAGMA_MSVC(warning(disable : 4100))
+
namespace kfr
{
@@ -65,9 +68,9 @@ protected:
virtual void do_execute(complex<T>*, const complex<T>*, u8* temp) = 0;
};
-#pragma clang diagnostic push
+CMT_PRAGMA_GNU(GCC diagnostic push)
#if CMT_HAS_WARNING("-Wassume")
-#pragma clang diagnostic ignored "-Wassume"
+CMT_PRAGMA_GNU(GCC diagnostic ignored "-Wassume")
#endif
namespace internal
@@ -75,15 +78,17 @@ namespace internal
template <size_t width, bool inverse, typename T>
KFR_SINTRIN cvec<T, width> radix4_apply_twiddle(csize_t<width>, cfalse_t /*split_format*/, cbool_t<inverse>,
- cvec<T, width> w, cvec<T, width> tw)
+ const cvec<T, width>& w, const cvec<T, width>& tw)
{
- cvec<T, width> b1 = w * dupeven(tw);
- w = swap<2>(w);
+ cvec<T, width> ww = w;
+ cvec<T, width> tw_ = tw;
+ cvec<T, width> b1 = ww * dupeven(tw_);
+ ww = swap<2>(ww);
if (inverse)
- tw = -(tw);
- w = subadd(b1, w * dupodd(tw));
- return w;
+ tw_ = -(tw_);
+ ww = subadd(b1, ww * dupodd(tw_));
+ return ww;
}
template <size_t width, bool use_br2, bool inverse, bool aligned, typename T>
@@ -107,9 +112,9 @@ KFR_SINTRIN void radix4_body(size_t N, csize_t<width>, cfalse_t, cfalse_t, cfals
cwrite<width, aligned>(out, sum02 + sum13);
w2 = sum02 - sum13;
- cwrite<width, aligned>(
- out + N4 * (use_br2 ? 1 : 2),
- radix4_apply_twiddle(csize<width>, cfalse, cbool<inverse>, w2, cread<width, true>(twiddle + width)));
+ cwrite<width, aligned>(out + N4 * (use_br2 ? 1 : 2),
+ radix4_apply_twiddle(csize_t<width>(), cfalse, cbool_t<inverse>(), w2,
+ cread<width, true>(twiddle + width)));
diff02 = a0 - a2;
diff13 = a1 - a3;
if (inverse)
@@ -125,17 +130,17 @@ KFR_SINTRIN void radix4_body(size_t N, csize_t<width>, cfalse_t, cfalse_t, cfals
w1 = diff02 + diff13;
- cwrite<width, aligned>(
- out + N4 * (use_br2 ? 2 : 1),
- radix4_apply_twiddle(csize<width>, cfalse, cbool<inverse>, w1, cread<width, true>(twiddle + 0)));
+ cwrite<width, aligned>(out + N4 * (use_br2 ? 2 : 1),
+ radix4_apply_twiddle(csize_t<width>(), cfalse, cbool_t<inverse>(), w1,
+ cread<width, true>(twiddle + 0)));
w3 = diff02 - diff13;
- cwrite<width, aligned>(out + N4 * 3, radix4_apply_twiddle(csize<width>, cfalse, cbool<inverse>, w3,
- cread<width, true>(twiddle + width * 2)));
+ cwrite<width, aligned>(out + N4 * 3, radix4_apply_twiddle(csize_t<width>(), cfalse, cbool_t<inverse>(),
+ w3, cread<width, true>(twiddle + width * 2)));
}
template <size_t width, bool inverse, typename T>
KFR_SINTRIN cvec<T, width> radix4_apply_twiddle(csize_t<width>, ctrue_t /*split_format*/, cbool_t<inverse>,
- cvec<T, width> w, cvec<T, width> tw)
+ const cvec<T, width>& w, const cvec<T, width>& tw)
{
vec<T, width> re1, im1, twre, twim;
split(w, re1, im1);
@@ -144,10 +149,9 @@ KFR_SINTRIN cvec<T, width> radix4_apply_twiddle(csize_t<width>, ctrue_t /*split_
const vec<T, width> b1re = re1 * twre;
const vec<T, width> b1im = im1 * twre;
if (inverse)
- w = concat(b1re + im1 * twim, b1im - re1 * twim);
+ return concat(b1re + im1 * twim, b1im - re1 * twim);
else
- w = concat(b1re - im1 * twim, b1im + re1 * twim);
- return w;
+ return concat(b1re - im1 * twim, b1im + re1 * twim);
}
template <size_t width, bool splitout, bool splitin, bool use_br2, bool inverse, bool aligned, typename T>
@@ -175,8 +179,8 @@ KFR_SINTRIN void radix4_body(size_t N, csize_t<width>, ctrue_t, cbool_t<splitout
cwrite_split<width, aligned, write_split>(out, concat(sum02re + sum13re, sum02im + sum13im));
w2 = concat(sum02re - sum13re, sum02im - sum13im);
cwrite_split<width, aligned, write_split>(
- out + N4 * (use_br2 ? 1 : 2),
- radix4_apply_twiddle(csize<width>, ctrue, cbool<inverse>, w2, cread<width, true>(twiddle + width)));
+ out + N4 * (use_br2 ? 1 : 2), radix4_apply_twiddle(csize_t<width>(), ctrue, cbool_t<inverse>(), w2,
+ cread<width, true>(twiddle + width)));
const vec<T, width> diff02re = re0 - re2;
const vec<T, width> diff02im = im0 - im2;
@@ -187,11 +191,11 @@ KFR_SINTRIN void radix4_body(size_t N, csize_t<width>, ctrue_t, cbool_t<splitout
(inverse ? w3 : w1) = concat(diff02re + diff13im, diff02im - diff13re);
cwrite_split<width, aligned, write_split>(
- out + N4 * (use_br2 ? 2 : 1),
- radix4_apply_twiddle(csize<width>, ctrue, cbool<inverse>, w1, cread<width, true>(twiddle + 0)));
- cwrite_split<width, aligned, write_split>(out + N4 * 3,
- radix4_apply_twiddle(csize<width>, ctrue, cbool<inverse>, w3,
- cread<width, true>(twiddle + width * 2)));
+ out + N4 * (use_br2 ? 2 : 1), radix4_apply_twiddle(csize_t<width>(), ctrue, cbool_t<inverse>(), w1,
+ cread<width, true>(twiddle + 0)));
+ cwrite_split<width, aligned, write_split>(
+ out + N4 * 3, radix4_apply_twiddle(csize_t<width>(), ctrue, cbool_t<inverse>(), w3,
+ cread<width, true>(twiddle + width * 2)));
}
template <typename T>
@@ -254,30 +258,29 @@ CMT_NOINLINE void initialize_twiddles(complex<T>*& twiddle, size_t stage_size, s
}
}
-template <typename T>
-KFR_SINTRIN void prefetch_one(const complex<T>* in)
-{
#ifdef CMT_ARCH_X86
- __builtin_prefetch(ptr_cast<void>(in), 0, _MM_HINT_T0);
+#ifdef CMT_COMPILER_GNU
+#define KFR_PREFETCH(addr) __builtin_prefetch(::kfr::ptr_cast<void>(addr), 0, _MM_HINT_T0);
+#else
+#define KFR_PREFETCH(addr) _mm_prefetch(::kfr::ptr_cast<char>(addr), _MM_HINT_T0);
+#endif
#else
- __builtin_prefetch(ptr_cast<void>(in));
+#define KFR_PREFETCH(addr) __builtin_prefetch(::kfr::ptr_cast<void>(addr));
#endif
+
+template <typename T>
+KFR_SINTRIN void prefetch_one(const complex<T>* in)
+{
+ KFR_PREFETCH(in);
}
template <typename T>
KFR_SINTRIN void prefetch_four(size_t stride, const complex<T>* in)
{
-#ifdef CMT_ARCH_X86
- __builtin_prefetch(ptr_cast<void>(in), 0, _MM_HINT_T0);
- __builtin_prefetch(ptr_cast<void>(in + stride), 0, _MM_HINT_T0);
- __builtin_prefetch(ptr_cast<void>(in + stride * 2), 0, _MM_HINT_T0);
- __builtin_prefetch(ptr_cast<void>(in + stride * 3), 0, _MM_HINT_T0);
-#else
- __builtin_prefetch(ptr_cast<void>(in));
- __builtin_prefetch(ptr_cast<void>(in + stride));
- __builtin_prefetch(ptr_cast<void>(in + stride * 2));
- __builtin_prefetch(ptr_cast<void>(in + stride * 3));
-#endif
+ KFR_PREFETCH(in);
+ KFR_PREFETCH(in + stride);
+ KFR_PREFETCH(in + stride * 2);
+ KFR_PREFETCH(in + stride * 3);
}
template <typename Ntype, size_t width, bool splitout, bool splitin, bool prefetch, bool use_br2,
@@ -287,20 +290,21 @@ KFR_SINTRIN cfalse_t radix4_pass(Ntype N, size_t blocks, csize_t<width>, cbool_t
complex<T>* out, const complex<T>* in, const complex<T>*& twiddle)
{
constexpr static size_t prefetch_offset = width * 8;
- const auto N4 = N / csize<4>;
- const auto N43 = N4 * csize<3>;
+ const auto N4 = N / csize_t<4>();
+ const auto N43 = N4 * csize_t<3>();
CMT_ASSUME(blocks > 0);
CMT_ASSUME(N > 0);
CMT_ASSUME(N4 > 0);
CMT_LOOP_NOUNROLL for (size_t b = 0; b < blocks; b++)
{
-#pragma clang loop unroll_count(2)
+ CMT_PRAGMA_CLANG(clang loop unroll_count(2))
for (size_t n2 = 0; n2 < N4; n2 += width)
{
if (prefetch)
prefetch_four(N4, in + prefetch_offset);
- radix4_body(N, csize<width>, cbool < splitout || splitin >, cbool<splitout>, cbool<splitin>,
- cbool<use_br2>, cbool<inverse>, cbool<aligned>, out, in, twiddle + n2 * 3);
+ radix4_body(N, csize_t<width>(), cbool_t<(splitout || splitin)>(), cbool_t<splitout>(),
+ cbool_t<splitin>(), cbool_t<use_br2>(), cbool_t<inverse>(), cbool_t<aligned>(), out,
+ in, twiddle + n2 * 3);
in += width;
out += width;
}
@@ -321,7 +325,7 @@ KFR_SINTRIN ctrue_t radix4_pass(csize_t<32>, size_t blocks, csize_t<width>, cfal
for (size_t b = 0; b < blocks; b++)
{
if (prefetch)
- prefetch_four(csize<64>, out + prefetch_offset);
+ prefetch_four(csize_t<64>(), out + prefetch_offset);
cvec<T, 4> w0, w1, w2, w3, w4, w5, w6, w7;
split(cread<8, aligned>(out + 0), w0, w1);
split(cread<8, aligned>(out + 8), w2, w3);
@@ -330,13 +334,13 @@ KFR_SINTRIN ctrue_t radix4_pass(csize_t<32>, size_t blocks, csize_t<width>, cfal
butterfly8<4, inverse>(w0, w1, w2, w3, w4, w5, w6, w7);
- w1 = cmul(w1, fixed_twiddle<T, 4, 32, 0, 1, inverse>);
- w2 = cmul(w2, fixed_twiddle<T, 4, 32, 0, 2, inverse>);
- w3 = cmul(w3, fixed_twiddle<T, 4, 32, 0, 3, inverse>);
- w4 = cmul(w4, fixed_twiddle<T, 4, 32, 0, 4, inverse>);
- w5 = cmul(w5, fixed_twiddle<T, 4, 32, 0, 5, inverse>);
- w6 = cmul(w6, fixed_twiddle<T, 4, 32, 0, 6, inverse>);
- w7 = cmul(w7, fixed_twiddle<T, 4, 32, 0, 7, inverse>);
+ w1 = cmul(w1, fixed_twiddle<T, 4, 32, 0, 1, inverse>());
+ w2 = cmul(w2, fixed_twiddle<T, 4, 32, 0, 2, inverse>());
+ w3 = cmul(w3, fixed_twiddle<T, 4, 32, 0, 3, inverse>());
+ w4 = cmul(w4, fixed_twiddle<T, 4, 32, 0, 4, inverse>());
+ w5 = cmul(w5, fixed_twiddle<T, 4, 32, 0, 5, inverse>());
+ w6 = cmul(w6, fixed_twiddle<T, 4, 32, 0, 6, inverse>());
+ w7 = cmul(w7, fixed_twiddle<T, 4, 32, 0, 7, inverse>());
cvec<T, 8> z0, z1, z2, z3;
transpose4x8(w0, w1, w2, w3, w4, w5, w6, w7, z0, z1, z2, z3);
@@ -380,7 +384,7 @@ KFR_SINTRIN ctrue_t radix4_pass(csize_t<16>, size_t blocks, csize_t<width>, cfal
{
CMT_ASSUME(blocks > 0);
constexpr static size_t prefetch_offset = width * 4;
-#pragma clang loop unroll_count(2)
+ CMT_PRAGMA_CLANG(clang loop unroll_count(2))
for (size_t b = 0; b < blocks; b += 2)
{
if (prefetch)
@@ -434,13 +438,14 @@ struct fft_stage_impl : dft_stage<T>
this->stage_size = stage_size;
this->repeats = 4;
this->recursion = true;
- this->data_size = align_up(sizeof(complex<T>) * stage_size / 4 * 3, native_cache_alignment);
+ this->data_size =
+ align_up(sizeof(complex<T>) * stage_size / 4 * 3, platform<>::native_cache_alignment);
}
protected:
constexpr static bool prefetch = true;
constexpr static bool aligned = false;
- constexpr static size_t width = vector_width<T, cpu_t::native>;
+ constexpr static size_t width = platform<T>::vector_width;
virtual void do_initialize(size_t size) override final
{
@@ -452,12 +457,12 @@ protected:
{
const complex<T>* twiddle = ptr_cast<complex<T>>(this->data);
if (splitin)
- in = out;
- const size_t stage_size = this->stage_size;
- CMT_ASSUME(stage_size >= 2048);
- CMT_ASSUME(stage_size % 2048 == 0);
- radix4_pass(stage_size, 1, csize<width>, ctrue, cbool<splitin>, cbool<!is_even>, cbool<prefetch>,
- cbool<inverse>, cbool<aligned>, out, in, twiddle);
+ in = out;
+ const size_t stg_size = this->stage_size;
+ CMT_ASSUME(stg_size >= 2048);
+ CMT_ASSUME(stg_size % 2048 == 0);
+ radix4_pass(stg_size, 1, csize_t<width>(), ctrue, cbool_t<splitin>(), cbool_t<!is_even>(),
+ cbool_t<prefetch>(), cbool_t<inverse>(), cbool_t<aligned>(), out, in, twiddle);
}
};
@@ -470,11 +475,11 @@ struct fft_final_stage_impl : dft_stage<T>
this->out_offset = size;
this->repeats = 4;
this->recursion = true;
- this->data_size = align_up(sizeof(complex<T>) * size * 3 / 2, native_cache_alignment);
+ this->data_size = align_up(sizeof(complex<T>) * size * 3 / 2, platform<>::native_cache_alignment);
}
protected:
- constexpr static size_t width = vector_width<T, cpu_t::native>;
+ constexpr static size_t width = platform<T>::vector_width;
constexpr static bool is_even = cometa::is_even(ilog2(size));
constexpr static bool use_br2 = !is_even;
constexpr static bool aligned = false;
@@ -483,11 +488,11 @@ protected:
virtual void do_initialize(size_t total_size) override final
{
complex<T>* twiddle = ptr_cast<complex<T>>(this->data);
- size_t stage_size = this->stage_size;
- while (stage_size > 4)
+ size_t stg_size = this->stage_size;
+ while (stg_size > 4)
{
- initialize_twiddles<T, width>(twiddle, stage_size, total_size, true);
- stage_size /= 4;
+ initialize_twiddles<T, width>(twiddle, stg_size, total_size, true);
+ stg_size /= 4;
}
}
@@ -496,55 +501,55 @@ protected:
constexpr bool is_double = sizeof(T) == 8;
constexpr size_t final_size = is_even ? (is_double ? 4 : 16) : (is_double ? 8 : 32);
const complex<T>* twiddle = ptr_cast<complex<T>>(this->data);
- final_pass(csize<final_size>, out, in, twiddle);
+ final_pass(csize_t<final_size>(), out, in, twiddle);
}
KFR_INTRIN void final_pass(csize_t<8>, complex<T>* out, const complex<T>* in, const complex<T>* twiddle)
{
- radix4_pass(512, 1, csize<width>, ctrue, cbool<splitin>, cbool<use_br2>, cbool<prefetch>,
- cbool<inverse>, cbool<aligned>, out, in, twiddle);
- radix4_pass(128, 4, csize<width>, ctrue, ctrue, cbool<use_br2>, cbool<prefetch>, cbool<inverse>,
- cbool<aligned>, out, out, twiddle);
- radix4_pass(32, 16, csize<width>, cfalse, ctrue, cbool<use_br2>, cbool<prefetch>, cbool<inverse>,
- cbool<aligned>, out, out, twiddle);
- radix4_pass(csize<8>, 64, csize<width>, cfalse, cfalse, cbool<use_br2>, cbool<prefetch>,
- cbool<inverse>, cbool<aligned>, out, out, twiddle);
+ radix4_pass(512, 1, csize_t<width>(), ctrue, cbool_t<splitin>(), cbool_t<use_br2>(),
+ cbool_t<prefetch>(), cbool_t<inverse>(), cbool_t<aligned>(), out, in, twiddle);
+ radix4_pass(128, 4, csize_t<width>(), ctrue, ctrue, cbool_t<use_br2>(), cbool_t<prefetch>(),
+ cbool_t<inverse>(), cbool_t<aligned>(), out, out, twiddle);
+ radix4_pass(32, 16, csize_t<width>(), cfalse, ctrue, cbool_t<use_br2>(), cbool_t<prefetch>(),
+ cbool_t<inverse>(), cbool_t<aligned>(), out, out, twiddle);
+ radix4_pass(csize_t<8>(), 64, csize_t<width>(), cfalse, cfalse, cbool_t<use_br2>(),
+ cbool_t<prefetch>(), cbool_t<inverse>(), cbool_t<aligned>(), out, out, twiddle);
}
KFR_INTRIN void final_pass(csize_t<32>, complex<T>* out, const complex<T>* in, const complex<T>* twiddle)
{
- radix4_pass(512, 1, csize<width>, ctrue, cbool<splitin>, cbool<use_br2>, cbool<prefetch>,
- cbool<inverse>, cbool<aligned>, out, in, twiddle);
- radix4_pass(128, 4, csize<width>, cfalse, ctrue, cbool<use_br2>, cbool<prefetch>, cbool<inverse>,
- cbool<aligned>, out, out, twiddle);
- radix4_pass(csize<32>, 16, csize<width>, cfalse, cfalse, cbool<use_br2>, cbool<prefetch>,
- cbool<inverse>, cbool<aligned>, out, out, twiddle);
+ radix4_pass(512, 1, csize_t<width>(), ctrue, cbool_t<splitin>(), cbool_t<use_br2>(),
+ cbool_t<prefetch>(), cbool_t<inverse>(), cbool_t<aligned>(), out, in, twiddle);
+ radix4_pass(128, 4, csize_t<width>(), cfalse, ctrue, cbool_t<use_br2>(), cbool_t<prefetch>(),
+ cbool_t<inverse>(), cbool_t<aligned>(), out, out, twiddle);
+ radix4_pass(csize_t<32>(), 16, csize_t<width>(), cfalse, cfalse, cbool_t<use_br2>(),
+ cbool_t<prefetch>(), cbool_t<inverse>(), cbool_t<aligned>(), out, out, twiddle);
}
KFR_INTRIN void final_pass(csize_t<4>, complex<T>* out, const complex<T>* in, const complex<T>* twiddle)
{
- radix4_pass(1024, 1, csize<width>, ctrue, cbool<splitin>, cbool<use_br2>, cbool<prefetch>,
- cbool<inverse>, cbool<aligned>, out, in, twiddle);
- radix4_pass(256, 4, csize<width>, ctrue, ctrue, cbool<use_br2>, cbool<prefetch>, cbool<inverse>,
- cbool<aligned>, out, out, twiddle);
- radix4_pass(64, 16, csize<width>, ctrue, ctrue, cbool<use_br2>, cbool<prefetch>, cbool<inverse>,
- cbool<aligned>, out, out, twiddle);
- radix4_pass(16, 64, csize<width>, cfalse, ctrue, cbool<use_br2>, cbool<prefetch>, cbool<inverse>,
- cbool<aligned>, out, out, twiddle);
- radix4_pass(csize<4>, 256, csize<width>, cfalse, cfalse, cbool<use_br2>, cbool<prefetch>,
- cbool<inverse>, cbool<aligned>, out, out, twiddle);
+ radix4_pass(1024, 1, csize_t<width>(), ctrue, cbool_t<splitin>(), cbool_t<use_br2>(),
+ cbool_t<prefetch>(), cbool_t<inverse>(), cbool_t<aligned>(), out, in, twiddle);
+ radix4_pass(256, 4, csize_t<width>(), ctrue, ctrue, cbool_t<use_br2>(), cbool_t<prefetch>(),
+ cbool_t<inverse>(), cbool_t<aligned>(), out, out, twiddle);
+ radix4_pass(64, 16, csize_t<width>(), ctrue, ctrue, cbool_t<use_br2>(), cbool_t<prefetch>(),
+ cbool_t<inverse>(), cbool_t<aligned>(), out, out, twiddle);
+ radix4_pass(16, 64, csize_t<width>(), cfalse, ctrue, cbool_t<use_br2>(), cbool_t<prefetch>(),
+ cbool_t<inverse>(), cbool_t<aligned>(), out, out, twiddle);
+ radix4_pass(csize_t<4>(), 256, csize_t<width>(), cfalse, cfalse, cbool_t<use_br2>(),
+ cbool_t<prefetch>(), cbool_t<inverse>(), cbool_t<aligned>(), out, out, twiddle);
}
KFR_INTRIN void final_pass(csize_t<16>, complex<T>* out, const complex<T>* in, const complex<T>* twiddle)
{
- radix4_pass(1024, 1, csize<width>, ctrue, cbool<splitin>, cbool<use_br2>, cbool<prefetch>,
- cbool<inverse>, cbool<aligned>, out, in, twiddle);
- radix4_pass(256, 4, csize<width>, ctrue, ctrue, cbool<use_br2>, cbool<prefetch>, cbool<inverse>,
- cbool<aligned>, out, out, twiddle);
- radix4_pass(64, 16, csize<width>, cfalse, ctrue, cbool<use_br2>, cbool<prefetch>, cbool<inverse>,
- cbool<aligned>, out, out, twiddle);
- radix4_pass(csize<16>, 64, csize<width>, cfalse, cfalse, cbool<use_br2>, cbool<prefetch>,
- cbool<inverse>, cbool<aligned>, out, out, twiddle);
+ radix4_pass(1024, 1, csize_t<width>(), ctrue, cbool_t<splitin>(), cbool_t<use_br2>(),
+ cbool_t<prefetch>(), cbool_t<inverse>(), cbool_t<aligned>(), out, in, twiddle);
+ radix4_pass(256, 4, csize_t<width>(), ctrue, ctrue, cbool_t<use_br2>(), cbool_t<prefetch>(),
+ cbool_t<inverse>(), cbool_t<aligned>(), out, out, twiddle);
+ radix4_pass(64, 16, csize_t<width>(), cfalse, ctrue, cbool_t<use_br2>(), cbool_t<prefetch>(),
+ cbool_t<inverse>(), cbool_t<aligned>(), out, out, twiddle);
+ radix4_pass(csize_t<16>(), 64, csize_t<width>(), cfalse, cfalse, cbool_t<use_br2>(),
+ cbool_t<prefetch>(), cbool_t<inverse>(), cbool_t<aligned>(), out, out, twiddle);
}
};
@@ -565,7 +570,7 @@ protected:
virtual void do_execute(complex<T>* out, const complex<T>*, u8* /*temp*/) override final
{
- fft_reorder(out, log2n, cbool<!is_even>);
+ fft_reorder(out, log2n, cbool_t<!is_even>());
}
};
@@ -596,7 +601,7 @@ protected:
{
cvec<T, 1> a0, a1, a2, a3;
split(cread<4>(in), a0, a1, a2, a3);
- butterfly(cbool<inverse>, a0, a1, a2, a3, a0, a1, a2, a3);
+ butterfly(cbool_t<inverse>(), a0, a1, a2, a3, a0, a1, a2, a3);
cwrite<4>(out, concat(a0, a1, a2, a3));
}
};
@@ -651,7 +656,7 @@ protected:
constexpr static bool aligned = false;
virtual void do_execute(complex<T>* out, const complex<T>* in, u8*) override final
{
- butterfly64(cbool<inverse>, cbool<aligned>, out, in);
+ butterfly64(cbool_t<inverse>(), cbool_t<aligned>(), out, in);
}
};
@@ -661,12 +666,12 @@ struct fft_specialization<T, 7, inverse> : dft_stage<T>
fft_specialization(size_t)
{
this->stage_size = 128;
- this->data_size = align_up(sizeof(complex<T>) * 128 * 3 / 2, native_cache_alignment);
+ this->data_size = align_up(sizeof(complex<T>) * 128 * 3 / 2, platform<>::native_cache_alignment);
}
protected:
constexpr static bool aligned = false;
- constexpr static size_t width = vector_width<T, cpu_t::native>;
+ constexpr static size_t width = platform<T>::vector_width;
constexpr static bool use_br2 = true;
constexpr static bool prefetch = false;
constexpr static bool is_double = sizeof(T) == 8;
@@ -684,26 +689,26 @@ protected:
virtual void do_execute(complex<T>* out, const complex<T>* in, u8* /*temp*/) override final
{
const complex<T>* twiddle = ptr_cast<complex<T>>(this->data);
- final_pass(csize<final_size>, out, in, twiddle);
- fft_reorder(out, csize<7>);
+ final_pass(csize_t<final_size>(), out, in, twiddle);
+ fft_reorder(out, csize_t<7>());
}
KFR_INTRIN void final_pass(csize_t<8>, complex<T>* out, const complex<T>* in, const complex<T>* twiddle)
{
- radix4_pass(128, 1, csize<width>, ctrue, cfalse, cbool<use_br2>, cbool<prefetch>, cbool<inverse>,
- cbool<aligned>, out, in, twiddle);
- radix4_pass(32, 4, csize<width>, cfalse, ctrue, cbool<use_br2>, cbool<prefetch>, cbool<inverse>,
- cbool<aligned>, out, out, twiddle);
- radix4_pass(csize<8>, 16, csize<width>, cfalse, cfalse, cbool<use_br2>, cbool<prefetch>,
- cbool<inverse>, cbool<aligned>, out, out, twiddle);
+ radix4_pass(128, 1, csize_t<width>(), ctrue, cfalse, cbool_t<use_br2>(), cbool_t<prefetch>(),
+ cbool_t<inverse>(), cbool_t<aligned>(), out, in, twiddle);
+ radix4_pass(32, 4, csize_t<width>(), cfalse, ctrue, cbool_t<use_br2>(), cbool_t<prefetch>(),
+ cbool_t<inverse>(), cbool_t<aligned>(), out, out, twiddle);
+ radix4_pass(csize_t<8>(), 16, csize_t<width>(), cfalse, cfalse, cbool_t<use_br2>(),
+ cbool_t<prefetch>(), cbool_t<inverse>(), cbool_t<aligned>(), out, out, twiddle);
}
KFR_INTRIN void final_pass(csize_t<32>, complex<T>* out, const complex<T>* in, const complex<T>* twiddle)
{
- radix4_pass(128, 1, csize<width>, cfalse, cfalse, cbool<use_br2>, cbool<prefetch>, cbool<inverse>,
- cbool<aligned>, out, in, twiddle);
- radix4_pass(csize<32>, 4, csize<width>, cfalse, cfalse, cbool<use_br2>, cbool<prefetch>,
- cbool<inverse>, cbool<aligned>, out, out, twiddle);
+ radix4_pass(128, 1, csize_t<width>(), cfalse, cfalse, cbool_t<use_br2>(), cbool_t<prefetch>(),
+ cbool_t<inverse>(), cbool_t<aligned>(), out, in, twiddle);
+ radix4_pass(csize_t<32>(), 4, csize_t<width>(), cfalse, cfalse, cbool_t<use_br2>(),
+ cbool_t<prefetch>(), cbool_t<inverse>(), cbool_t<aligned>(), out, out, twiddle);
}
};
@@ -749,12 +754,12 @@ struct fft_specialization<double, 8, inverse> : dft_stage<double>
fft_specialization(size_t)
{
this->stage_size = 256;
- this->data_size = align_up(sizeof(complex<T>) * 256 * 3 / 2, native_cache_alignment);
+ this->data_size = align_up(sizeof(complex<T>) * 256 * 3 / 2, platform<>::native_cache_alignment);
}
protected:
constexpr static bool aligned = false;
- constexpr static size_t width = vector_width<T, cpu_t::native>;
+ constexpr static size_t width = platform<T>::vector_width;
constexpr static bool use_br2 = false;
constexpr static bool prefetch = false;
constexpr static size_t split_format = true;
@@ -770,20 +775,20 @@ protected:
virtual void do_execute(complex<T>* out, const complex<T>* in, u8* /*temp*/) override final
{
const complex<T>* twiddle = ptr_cast<complex<T>>(this->data);
- final_pass(csize<4>, out, in, twiddle);
- fft_reorder(out, csize<8>);
+ final_pass(csize_t<4>(), out, in, twiddle);
+ fft_reorder(out, csize_t<8>());
}
KFR_INTRIN void final_pass(csize_t<4>, complex<T>* out, const complex<T>* in, const complex<T>* twiddle)
{
- radix4_pass(256, 1, csize<width>, ctrue, cfalse, cbool<use_br2>, cbool<prefetch>, cbool<inverse>,
- cbool<aligned>, out, in, twiddle);
- radix4_pass(64, 4, csize<width>, ctrue, ctrue, cbool<use_br2>, cbool<prefetch>, cbool<inverse>,
- cbool<aligned>, out, out, twiddle);
- radix4_pass(16, 16, csize<width>, cfalse, ctrue, cbool<use_br2>, cbool<prefetch>, cbool<inverse>,
- cbool<aligned>, out, out, twiddle);
- radix4_pass(csize<4>, 64, csize<width>, cfalse, cfalse, cbool<use_br2>, cbool<prefetch>,
- cbool<inverse>, cbool<aligned>, out, out, twiddle);
+ radix4_pass(256, 1, csize_t<width>(), ctrue, cfalse, cbool_t<use_br2>(), cbool_t<prefetch>(),
+ cbool_t<inverse>(), cbool_t<aligned>(), out, in, twiddle);
+ radix4_pass(64, 4, csize_t<width>(), ctrue, ctrue, cbool_t<use_br2>(), cbool_t<prefetch>(),
+ cbool_t<inverse>(), cbool_t<aligned>(), out, out, twiddle);
+ radix4_pass(16, 16, csize_t<width>(), cfalse, ctrue, cbool_t<use_br2>(), cbool_t<prefetch>(),
+ cbool_t<inverse>(), cbool_t<aligned>(), out, out, twiddle);
+ radix4_pass(csize_t<4>(), 64, csize_t<width>(), cfalse, cfalse, cbool_t<use_br2>(),
+ cbool_t<prefetch>(), cbool_t<inverse>(), cbool_t<aligned>(), out, out, twiddle);
}
};
@@ -835,15 +840,16 @@ struct dft_plan
if (is_poweroftwo(size))
{
const size_t log2n = ilog2(size);
- cswitch(csizes<1, 2, 3, 4, 5, 6, 7, 8>, log2n,
+ cswitch(csizes_t<1, 2, 3, 4, 5, 6, 7, 8>(), log2n,
[&](auto log2n) {
- add_stage<internal::fft_specialization_t<T, val_of(decltype(log2n)()),
- false>::template type>(size, type);
+ (void)log2n;
+ this->add_stage<internal::fft_specialization_t<T, val_of(decltype(log2n)()),
+ false>::template type>(size, type);
},
[&]() {
cswitch(cfalse_true, is_even(log2n), [&](auto is_even) {
- make_fft(size, type, is_even, ctrue);
- add_stage<internal::fft_reorder_stage_impl_t<
+ this->make_fft(size, type, is_even, ctrue);
+ this->add_stage<internal::fft_reorder_stage_impl_t<
T, val_of(decltype(is_even)())>::template type>(size, type);
});
});
@@ -927,7 +933,7 @@ protected:
{
add_stage<fft_stage_impl_t::template type>(stage_size, type);
- make_fft(stage_size / 4, cbools<direct, inverse>, cbool<is_even>, cfalse);
+ make_fft(stage_size / 4, cbools_t<direct, inverse>(), cbool_t<is_even>(), cfalse);
}
else
{
@@ -1021,15 +1027,13 @@ struct dft_plan_real : dft_plan<T>
: dft_plan<T>(size / 2, type), rtwiddle(size / 4)
{
using namespace internal;
- const size_t csize = size / 2;
- const size_t count = size / 4;
- constexpr size_t width = vector_width<T> * 2;
+ constexpr size_t width = platform<T>::vector_width * 2;
- block_process(count, csizes<width, 1>, [=](size_t i, auto w) {
+ block_process(size / 4, csizes_t<width, 1>(), [=](size_t i, auto w) {
constexpr size_t width = val_of(decltype(w)());
cwrite<width>(rtwiddle.data() + i,
- cossin(dup(-c_pi<T> * ((enumerate<T, width>() + i + count) / csize))));
+ cossin(dup(-c_pi<T> * ((enumerate<T, width>() + i + size / 4) / (size / 2)))));
});
}
@@ -1069,13 +1073,13 @@ private:
void to_fmt(complex<T>* out, dft_pack_format fmt) const
{
using namespace internal;
- const size_t csize = this->size; // / 2;
+ size_t csize = this->size; // const size_t causes internal compiler error: in tsubst_copy in GCC 5.2
- constexpr size_t width = vector_width<T> * 2;
+ constexpr size_t width = platform<T>::vector_width * 2;
const cvec<T, 1> dc = cread<1>(out);
const size_t count = csize / 2;
- block_process(count, csizes<width, 1>, [=](size_t i, auto w) {
+ block_process(count, csizes_t<width, 1>(), [=](size_t i, auto w) {
constexpr size_t width = val_of(decltype(w)());
constexpr size_t widthm1 = width - 1;
const cvec<T, width> tw = cread<width>(rtwiddle.data() + i);
@@ -1122,10 +1126,10 @@ private:
dc = pack(in[0].real() + in[0].imag(), in[0].real() - in[0].imag());
}
- constexpr size_t width = vector_width<T> * 2;
+ constexpr size_t width = platform<T>::vector_width * 2;
const size_t count = csize / 2;
- block_process(count, csizes<width, 1>, [=](size_t i, auto w) {
+ block_process(count, csizes_t<width, 1>(), [=](size_t i, auto w) {
constexpr size_t width = val_of(decltype(w)());
constexpr size_t widthm1 = width - 1;
const cvec<T, width> tw = cread<width>(rtwiddle.data() + i);
@@ -1150,4 +1154,6 @@ private:
};
}
-#pragma clang diagnostic pop
+CMT_PRAGMA_GNU(GCC diagnostic pop)
+
+CMT_PRAGMA_MSVC(warning(pop))
diff --git a/include/kfr/dft/ft.hpp b/include/kfr/dft/ft.hpp
@@ -37,6 +37,9 @@
#include "../base/memory.hpp"
#include "../data/sincos.hpp"
+CMT_PRAGMA_MSVC(warning(push))
+CMT_PRAGMA_MSVC(warning(disable : 4127))
+
namespace kfr
{
@@ -44,18 +47,18 @@ namespace internal
{
template <typename T, size_t N, KFR_ENABLE_IF(N >= 2)>
-CMT_INLINE vec<T, N> cmul_impl(vec<T, N> x, vec<T, N> y)
+CMT_INLINE vec<T, N> cmul_impl(const vec<T, N>& x, const vec<T, N>& y)
{
return subadd(x * dupeven(y), swap<2>(x) * dupodd(y));
}
template <typename T, size_t N, KFR_ENABLE_IF(N > 2)>
-CMT_INLINE vec<T, N> cmul_impl(vec<T, N> x, vec<T, 2> y)
+CMT_INLINE vec<T, N> cmul_impl(const vec<T, N>& x, const vec<T, 2>& y)
{
vec<T, N> yy = resize<N>(y);
return cmul_impl(x, yy);
}
template <typename T, size_t N, KFR_ENABLE_IF(N > 2)>
-CMT_INLINE vec<T, N> cmul_impl(vec<T, 2> x, vec<T, N> y)
+CMT_INLINE vec<T, N> cmul_impl(const vec<T, 2>& x, const vec<T, N>& y)
{
vec<T, N> xx = resize<N>(x);
return cmul_impl(xx, y);
@@ -63,23 +66,24 @@ CMT_INLINE vec<T, N> cmul_impl(vec<T, 2> x, vec<T, N> y)
/// Complex Multiplication
template <typename T, size_t N1, size_t N2>
-CMT_INLINE vec<T, const_max(N1, N2)> cmul(vec<T, N1> x, vec<T, N2> y)
+CMT_INLINE vec<T, const_max(N1, N2)> cmul(const vec<T, N1>& x, const vec<T, N2>& y)
{
return internal::cmul_impl(x, y);
}
template <typename T, size_t N, KFR_ENABLE_IF(N >= 2)>
-CMT_INLINE vec<T, N> cmul_conj(vec<T, N> x, vec<T, N> y)
+CMT_INLINE vec<T, N> cmul_conj(const vec<T, N>& x, const vec<T, N>& y)
{
return swap<2>(subadd(swap<2>(x) * dupeven(y), x * dupodd(y)));
}
template <typename T, size_t N, KFR_ENABLE_IF(N >= 2)>
-CMT_INLINE vec<T, N> cmul_2conj(vec<T, N> in0, vec<T, N> in1, vec<T, N> tw)
+CMT_INLINE vec<T, N> cmul_2conj(const vec<T, N>& in0, const vec<T, N>& in1, const vec<T, N>& tw)
{
return (in0 + in1) * dupeven(tw) + swap<2>(cnegimag(in0 - in1)) * dupodd(tw);
}
template <typename T, size_t N, KFR_ENABLE_IF(N >= 2)>
-CMT_INLINE void cmul_2conj(vec<T, N>& out0, vec<T, N>& out1, vec<T, 2> in0, vec<T, 2> in1, vec<T, N> tw)
+CMT_INLINE void cmul_2conj(vec<T, N>& out0, vec<T, N>& out1, const vec<T, 2>& in0, const vec<T, 2>& in1,
+ const vec<T, N>& tw)
{
const vec<T, N> twr = dupeven(tw);
const vec<T, N> twi = dupodd(tw);
@@ -91,13 +95,13 @@ CMT_INLINE void cmul_2conj(vec<T, N>& out0, vec<T, N>& out1, vec<T, 2> in0, vec<
out1 += sumtw - diftw;
}
template <typename T, size_t N, KFR_ENABLE_IF(N > 2)>
-CMT_INLINE vec<T, N> cmul_conj(vec<T, N> x, vec<T, 2> y)
+CMT_INLINE vec<T, N> cmul_conj(const vec<T, N>& x, const vec<T, 2>& y)
{
vec<T, N> yy = resize<N>(y);
return cmul_conj(x, yy);
}
template <typename T, size_t N, KFR_ENABLE_IF(N > 2)>
-CMT_INLINE vec<T, N> cmul_conj(vec<T, 2> x, vec<T, N> y)
+CMT_INLINE vec<T, N> cmul_conj(const vec<T, 2>& x, const vec<T, N>& y)
{
vec<T, N> xx = resize<N>(x);
return cmul_conj(xx, y);
@@ -113,7 +117,7 @@ CMT_INLINE cvec<T, N> cread(const complex<T>* src)
}
template <size_t N, bool A = false, typename T>
-CMT_INLINE void cwrite(complex<T>* dest, cvec<T, N> value)
+CMT_INLINE void cwrite(complex<T>* dest, const cvec<T, N>& value)
{
return simd_write<A, N * 2>(ptr_cast<T>(dest), *value);
}
@@ -124,7 +128,7 @@ CMT_INLINE cvec<T, count * N> cread_group_impl(const complex<T>* src, csizes_t<i
return concat(read<N * 2, A>(ptr_cast<T>(src + stride * indices))...);
}
template <size_t count, size_t N, size_t stride, bool A, typename T, size_t... indices>
-CMT_INLINE void cwrite_group_impl(complex<T>* dest, cvec<T, count * N> value, csizes_t<indices...>)
+CMT_INLINE void cwrite_group_impl(complex<T>* dest, const cvec<T, count * N>& value, csizes_t<indices...>)
{
swallow{ (write<A>(ptr_cast<T>(dest + stride * indices), slice<indices * N * 2, N * 2>(value)), 0)... };
}
@@ -135,7 +139,7 @@ CMT_INLINE cvec<T, count * N> cread_group_impl(const complex<T>* src, size_t str
return concat(read<N * 2, A>(ptr_cast<T>(src + stride * indices))...);
}
template <size_t count, size_t N, bool A, typename T, size_t... indices>
-CMT_INLINE void cwrite_group_impl(complex<T>* dest, size_t stride, cvec<T, count * N> value,
+CMT_INLINE void cwrite_group_impl(complex<T>* dest, size_t stride, const cvec<T, count * N>& value,
csizes_t<indices...>)
{
swallow{ (write<A>(ptr_cast<T>(dest + stride * indices), slice<indices * N * 2, N * 2>(value)), 0)... };
@@ -144,25 +148,25 @@ CMT_INLINE void cwrite_group_impl(complex<T>* dest, size_t stride, cvec<T, count
template <size_t count, size_t N, size_t stride, bool A = false, typename T>
CMT_INLINE cvec<T, count * N> cread_group(const complex<T>* src)
{
- return cread_group_impl<count, N, stride, A>(src, csizeseq<count>);
+ return cread_group_impl<count, N, stride, A>(src, csizeseq_t<count>());
}
template <size_t count, size_t N, size_t stride, bool A = false, typename T>
-CMT_INLINE void cwrite_group(complex<T>* dest, cvec<T, count * N> value)
+CMT_INLINE void cwrite_group(complex<T>* dest, const cvec<T, count * N>& value)
{
- return cwrite_group_impl<count, N, stride, A>(dest, value, csizeseq<count>);
+ return cwrite_group_impl<count, N, stride, A>(dest, value, csizeseq_t<count>());
}
template <size_t count, size_t N, bool A = false, typename T>
CMT_INLINE cvec<T, count * N> cread_group(const complex<T>* src, size_t stride)
{
- return cread_group_impl<count, N, A>(src, stride, csizeseq<count>);
+ return cread_group_impl<count, N, A>(src, stride, csizeseq_t<count>());
}
template <size_t count, size_t N, bool A = false, typename T>
-CMT_INLINE void cwrite_group(complex<T>* dest, size_t stride, cvec<T, count * N> value)
+CMT_INLINE void cwrite_group(complex<T>* dest, size_t stride, const cvec<T, count * N>& value)
{
- return cwrite_group_impl<count, N, A>(dest, stride, value, csizeseq<count>);
+ return cwrite_group_impl<count, N, A>(dest, stride, value, csizeseq_t<count>());
}
template <size_t N, bool A = false, bool split = false, typename T>
@@ -175,11 +179,12 @@ CMT_INLINE cvec<T, N> cread_split(const complex<T>* src)
}
template <size_t N, bool A = false, bool split = false, typename T>
-CMT_INLINE void cwrite_split(complex<T>* dest, cvec<T, N> value)
+CMT_INLINE void cwrite_split(complex<T>* dest, const cvec<T, N>& value)
{
+ cvec<T, N> v = value;
if (split)
- value = interleavehalfs(value);
- simd_write<A, N * 2>(ptr_cast<T>(dest), *value);
+ v = interleavehalfs(v);
+ simd_write<A, N * 2>(ptr_cast<T>(dest), *v);
}
template <>
@@ -209,47 +214,51 @@ inline cvec<f64, 4> cread_split<4, false, true, f64>(const complex<f64>* src)
}
template <>
-inline void cwrite_split<8, false, true, f32>(complex<f32>* dest, cvec<f32, 8> x)
+inline void cwrite_split<8, false, true, f32>(complex<f32>* dest, const cvec<f32, 8>& x)
{
- x = concat(shuffle<0, 8 + 0, 1, 8 + 1>(low(x), high(x)), shuffle<2, 8 + 2, 3, 8 + 3>(low(x), high(x)));
+ const cvec<f32, 8> xx =
+ concat(shuffle<0, 8 + 0, 1, 8 + 1>(low(x), high(x)), shuffle<2, 8 + 2, 3, 8 + 3>(low(x), high(x)));
cvec<f32, 2> a, b, c, d;
- split(x, a, b, c, d);
+ split(xx, a, b, c, d);
cwrite<2>(dest, a);
cwrite<2>(dest + 4, b);
cwrite<2>(dest + 2, c);
cwrite<2>(dest + 6, d);
}
template <>
-inline void cwrite_split<8, true, true, f32>(complex<f32>* dest, cvec<f32, 8> x)
+inline void cwrite_split<8, true, true, f32>(complex<f32>* dest, const cvec<f32, 8>& x)
{
- x = concat(shuffle<0, 8 + 0, 1, 8 + 1>(low(x), high(x)), shuffle<2, 8 + 2, 3, 8 + 3>(low(x), high(x)));
+ const cvec<f32, 8> xx =
+ concat(shuffle<0, 8 + 0, 1, 8 + 1>(low(x), high(x)), shuffle<2, 8 + 2, 3, 8 + 3>(low(x), high(x)));
cvec<f32, 2> a, b, c, d;
- split(x, a, b, c, d);
- cwrite<2, true>(dest, a);
+ split(xx, a, b, c, d);
+ cwrite<2, true>(dest + 0, a);
cwrite<2, true>(dest + 4, b);
cwrite<2, true>(dest + 2, c);
cwrite<2, true>(dest + 6, d);
}
template <>
-inline void cwrite_split<4, false, true, f64>(complex<f64>* dest, cvec<f64, 4> x)
+inline void cwrite_split<4, false, true, f64>(complex<f64>* dest, const cvec<f64, 4>& x)
{
- x = concat(shuffle<0, 4, 2, 6>(low(x), high(x)), shuffle<1, 5, 3, 7>(low(x), high(x)));
- cwrite<1>(dest, part<4, 0>(x));
- cwrite<1>(dest + 2, part<4, 1>(x));
- cwrite<1>(dest + 1, part<4, 2>(x));
- cwrite<1>(dest + 3, part<4, 3>(x));
+ const cvec<f64, 4> xx =
+ concat(shuffle<0, 4, 2, 6>(low(x), high(x)), shuffle<1, 5, 3, 7>(low(x), high(x)));
+ cwrite<1>(dest, part<4, 0>(xx));
+ cwrite<1>(dest + 2, part<4, 1>(xx));
+ cwrite<1>(dest + 1, part<4, 2>(xx));
+ cwrite<1>(dest + 3, part<4, 3>(xx));
}
template <>
-inline void cwrite_split<4, true, true, f64>(complex<f64>* dest, cvec<f64, 4> x)
+inline void cwrite_split<4, true, true, f64>(complex<f64>* dest, const cvec<f64, 4>& x)
{
- x = concat(shuffle<0, 4, 2, 6>(low(x), high(x)), shuffle<1, 5, 3, 7>(low(x), high(x)));
- cwrite<1, true>(dest, part<4, 0>(x));
- cwrite<1, true>(dest + 2, part<4, 1>(x));
- cwrite<1, true>(dest + 1, part<4, 2>(x));
- cwrite<1, true>(dest + 3, part<4, 3>(x));
+ const cvec<f64, 4> xx =
+ concat(shuffle<0, 4, 2, 6>(low(x), high(x)), shuffle<1, 5, 3, 7>(low(x), high(x)));
+ cwrite<1, true>(dest + 0, part<4, 0>(xx));
+ cwrite<1, true>(dest + 2, part<4, 1>(xx));
+ cwrite<1, true>(dest + 1, part<4, 2>(xx));
+ cwrite<1, true>(dest + 3, part<4, 3>(xx));
}
template <size_t N, size_t stride, typename T, size_t... Indices>
@@ -266,7 +275,7 @@ CMT_INLINE cvec<T, N> cgather(const complex<T>* base)
return ref_cast<cvec<T, N>>(*base);
}
else
- return cgather_helper<N, stride, T>(base, csizeseq<N>);
+ return cgather_helper<N, stride, T>(base, csizeseq_t<N>());
}
CMT_INLINE size_t cgather_next(size_t& index, size_t stride, size_t size, size_t)
@@ -294,13 +303,13 @@ CMT_INLINE cvec<T, N> cgather_helper(const complex<T>* base, size_t& index, size
template <size_t N, typename T>
CMT_INLINE cvec<T, N> cgather(const complex<T>* base, size_t& index, size_t stride)
{
- return cgather_helper<N, T>(base, index, stride, csizeseq<N>);
+ return cgather_helper<N, T>(base, index, stride, csizeseq_t<N>());
}
template <size_t N, typename T>
CMT_INLINE cvec<T, N> cgather(const complex<T>* base, size_t stride)
{
size_t index = 0;
- return cgather_helper<N, T>(base, index, stride, csizeseq<N>);
+ return cgather_helper<N, T>(base, index, stride, csizeseq_t<N>());
}
template <size_t N, typename T, size_t... Indices>
@@ -313,17 +322,17 @@ CMT_INLINE cvec<T, N> cgather_helper(const complex<T>* base, size_t& index, size
template <size_t N, typename T>
CMT_INLINE cvec<T, N> cgather(const complex<T>* base, size_t& index, size_t stride, size_t size)
{
- return cgather_helper<N, T>(base, index, stride, size, csizeseq<N>);
+ return cgather_helper<N, T>(base, index, stride, size, csizeseq_t<N>());
}
template <size_t N, size_t stride, typename T, size_t... Indices>
-CMT_INLINE void cscatter_helper(complex<T>* base, cvec<T, N> value, csizes_t<Indices...>)
+CMT_INLINE void cscatter_helper(complex<T>* base, const cvec<T, N>& value, csizes_t<Indices...>)
{
swallow{ (cwrite<1>(base + Indices * stride, slice<Indices * 2, 2>(value)), 0)... };
}
template <size_t N, size_t stride, typename T>
-CMT_INLINE void cscatter(complex<T>* base, cvec<T, N> value)
+CMT_INLINE void cscatter(complex<T>* base, const cvec<T, N>& value)
{
if (stride == 1)
{
@@ -331,38 +340,39 @@ CMT_INLINE void cscatter(complex<T>* base, cvec<T, N> value)
}
else
{
- return cscatter_helper<N, stride, T>(base, value, csizeseq<N>);
+ return cscatter_helper<N, stride, T>(base, value, csizeseq_t<N>());
}
}
template <size_t N, typename T, size_t... Indices>
-CMT_INLINE void cscatter_helper(complex<T>* base, size_t stride, cvec<T, N> value, csizes_t<Indices...>)
+CMT_INLINE void cscatter_helper(complex<T>* base, size_t stride, const cvec<T, N>& value,
+ csizes_t<Indices...>)
{
swallow{ (cwrite<1>(base + Indices * stride, slice<Indices * 2, 2>(value)), 0)... };
}
template <size_t N, typename T>
-CMT_INLINE void cscatter(complex<T>* base, size_t stride, cvec<T, N> value)
+CMT_INLINE void cscatter(complex<T>* base, size_t stride, const cvec<T, N>& value)
{
- return cscatter_helper<N, T>(base, stride, value, csizeseq<N>);
+ return cscatter_helper<N, T>(base, stride, value, csizeseq_t<N>());
}
template <size_t groupsize = 1, typename T, size_t N, typename IT>
-CMT_INLINE vec<T, N * 2 * groupsize> cgather(const complex<T>* base, vec<IT, N> offset)
+CMT_INLINE vec<T, N * 2 * groupsize> cgather(const complex<T>* base, const vec<IT, N>& offset)
{
- return gather_helper<2 * groupsize>(ptr_cast<T>(base), offset, csizeseq<N>);
+ return gather_helper<2 * groupsize>(ptr_cast<T>(base), offset, csizeseq_t<N>());
}
template <size_t groupsize = 1, typename T, size_t N, typename IT>
-CMT_INLINE void cscatter(complex<T>* base, vec<IT, N> offset, vec<T, N * 2 * groupsize> value)
+CMT_INLINE void cscatter(complex<T>* base, const vec<IT, N>& offset, vec<T, N * 2 * groupsize> value)
{
- return scatter_helper<2 * groupsize>(ptr_cast<T>(base), offset, value, csizeseq<N>);
+ return scatter_helper<2 * groupsize>(ptr_cast<T>(base), offset, value, csizeseq_t<N>());
}
template <typename T>
-KFR_INTRIN void transpose4x8(cvec<T, 8> z0, cvec<T, 8> z1, cvec<T, 8> z2, cvec<T, 8> z3, cvec<T, 4>& w0,
- cvec<T, 4>& w1, cvec<T, 4>& w2, cvec<T, 4>& w3, cvec<T, 4>& w4, cvec<T, 4>& w5,
- cvec<T, 4>& w6, cvec<T, 4>& w7)
+KFR_INTRIN void transpose4x8(const cvec<T, 8>& z0, const cvec<T, 8>& z1, const cvec<T, 8>& z2,
+ const cvec<T, 8>& z3, cvec<T, 4>& w0, cvec<T, 4>& w1, cvec<T, 4>& w2,
+ cvec<T, 4>& w3, cvec<T, 4>& w4, cvec<T, 4>& w5, cvec<T, 4>& w6, cvec<T, 4>& w7)
{
cvec<T, 16> a = concat(low(z0), low(z1), low(z2), low(z3));
cvec<T, 16> b = concat(high(z0), high(z1), high(z2), high(z3));
@@ -379,8 +389,9 @@ KFR_INTRIN void transpose4x8(cvec<T, 8> z0, cvec<T, 8> z1, cvec<T, 8> z2, cvec<T
}
template <typename T>
-KFR_INTRIN void transpose4x8(cvec<T, 4> w0, cvec<T, 4> w1, cvec<T, 4> w2, cvec<T, 4> w3, cvec<T, 4> w4,
- cvec<T, 4> w5, cvec<T, 4> w6, cvec<T, 4> w7, cvec<T, 8>& z0, cvec<T, 8>& z1,
+KFR_INTRIN void transpose4x8(const cvec<T, 4>& w0, const cvec<T, 4>& w1, const cvec<T, 4>& w2,
+ const cvec<T, 4>& w3, const cvec<T, 4>& w4, const cvec<T, 4>& w5,
+ const cvec<T, 4>& w6, const cvec<T, 4>& w7, cvec<T, 8>& z0, cvec<T, 8>& z1,
cvec<T, 8>& z2, cvec<T, 8>& z3)
{
cvec<T, 16> a = concat(w0, w1, w2, w3);
@@ -453,20 +464,20 @@ constexpr KFR_INTRIN cvec<T, width> get_fixed_twiddle_helper(csizes_t<indices...
: cos_using_table<T>(size, indices / 2 * step + start))...);
}
-template <typename T, size_t width, size_t size, size_t start, size_t step, bool inverse = false>
-constexpr KFR_INTRIN cvec<T, width> get_fixed_twiddle()
+template <typename T, size_t width, size_t size, size_t start, size_t step = 0, bool inverse = false>
+constexpr KFR_INTRIN cvec<T, width> fixed_twiddle()
{
- return get_fixed_twiddle_helper<T, width, size, start, step, inverse>(csizeseq<width * 2>);
+ return get_fixed_twiddle_helper<T, width, size, start, step, inverse>(csizeseq_t<width * 2>());
}
template <typename T, size_t width>
-constexpr KFR_INTRIN cvec<T, width> get_fixed_twiddle(size_t size, size_t start, size_t step = 0)
+constexpr KFR_INTRIN cvec<T, width> fixed_twiddle(size_t size, size_t start, size_t step = 0)
{
- return get_fixed_twiddle_helper<T, width>(csizeseq<width * 2>, start, step, size);
+ return get_fixed_twiddle_helper<T, width>(csizeseq_t<width * 2>(), start, step, size);
}
-template <typename T, size_t N, size_t size, size_t start, size_t step = 0, bool inverse = false>
-constexpr cvec<T, N> fixed_twiddle = get_fixed_twiddle<T, N, size, start, step, inverse>();
+// template <typename T, size_t N, size_t size, size_t start, size_t step = 0, bool inverse = false>
+// constexpr cvec<T, N> fixed_twiddle = get_fixed_twiddle<T, N, size, start, step, inverse>();
template <typename T, size_t N, bool inverse>
constexpr cvec<T, N> twiddleimagmask()
@@ -474,19 +485,19 @@ constexpr cvec<T, N> twiddleimagmask()
return inverse ? broadcast<N * 2, T>(-1, +1) : broadcast<N * 2, T>(+1, -1);
}
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wconversion"
+CMT_PRAGMA_GNU(GCC diagnostic push)
+CMT_PRAGMA_GNU(GCC diagnostic ignored "-Wconversion")
-#pragma clang diagnostic pop
+CMT_PRAGMA_GNU(GCC diagnostic pop)
template <typename T, size_t N>
-CMT_NOINLINE static vec<T, N> cossin_conj(vec<T, N> x)
+CMT_NOINLINE static vec<T, N> cossin_conj(const vec<T, N>& x)
{
return cconj(cossin(x));
}
template <size_t k, size_t size, bool inverse = false, typename T, size_t width>
-KFR_INTRIN vec<T, width> cmul_by_twiddle(vec<T, width> x)
+KFR_INTRIN vec<T, width> cmul_by_twiddle(const vec<T, width>& x)
{
constexpr size_t kk = (inverse ? size - k : k) % size;
constexpr T isqrt2 = static_cast<T>(0.70710678118654752440084436210485);
@@ -524,12 +535,12 @@ KFR_INTRIN vec<T, width> cmul_by_twiddle(vec<T, width> x)
}
else
{
- return cmul(x, resize<width>(fixed_twiddle<T, 1, size, kk>));
+ return cmul(x, resize<width>(fixed_twiddle<T, 1, size, kk>()));
}
}
template <size_t N, typename T>
-KFR_INTRIN void butterfly2(cvec<T, N> a0, cvec<T, N> a1, cvec<T, N>& w0, cvec<T, N>& w1)
+KFR_INTRIN void butterfly2(const cvec<T, N>& a0, const cvec<T, N>& a1, cvec<T, N>& w0, cvec<T, N>& w1)
{
w0 = a0 + a1;
w1 = a0 - a1;
@@ -542,8 +553,9 @@ KFR_INTRIN void butterfly2(cvec<T, N>& a0, cvec<T, N>& a1)
}
template <size_t N, bool inverse = false, typename T>
-KFR_INTRIN void butterfly4(cfalse_t /*split_format*/, cvec<T, N> a0, cvec<T, N> a1, cvec<T, N> a2,
- cvec<T, N> a3, cvec<T, N>& w0, cvec<T, N>& w1, cvec<T, N>& w2, cvec<T, N>& w3)
+KFR_INTRIN void butterfly4(cfalse_t /*split_format*/, const cvec<T, N>& a0, const cvec<T, N>& a1,
+ const cvec<T, N>& a2, const cvec<T, N>& a3, cvec<T, N>& w0, cvec<T, N>& w1,
+ cvec<T, N>& w2, cvec<T, N>& w3)
{
cvec<T, N> sum02, sum13, diff02, diff13;
cvec<T, N * 2> a01, a23, sum0213, diff0213;
@@ -575,8 +587,9 @@ KFR_INTRIN void butterfly4(cfalse_t /*split_format*/, cvec<T, N> a0, cvec<T, N>
}
template <size_t N, bool inverse = false, typename T>
-KFR_INTRIN void butterfly4(ctrue_t /*split_format*/, cvec<T, N> a0, cvec<T, N> a1, cvec<T, N> a2,
- cvec<T, N> a3, cvec<T, N>& w0, cvec<T, N>& w1, cvec<T, N>& w2, cvec<T, N>& w3)
+KFR_INTRIN void butterfly4(ctrue_t /*split_format*/, const cvec<T, N>& a0, const cvec<T, N>& a1,
+ const cvec<T, N>& a2, const cvec<T, N>& a3, cvec<T, N>& w0, cvec<T, N>& w1,
+ cvec<T, N>& w2, cvec<T, N>& w3)
{
vec<T, N> re0, im0, re1, im1, re2, im2, re3, im3;
vec<T, N> wre0, wim0, wre1, wim1, wre2, wim2, wre3, wim3;
@@ -601,15 +614,16 @@ KFR_INTRIN void butterfly4(ctrue_t /*split_format*/, cvec<T, N> a0, cvec<T, N> a
}
template <size_t N, bool inverse = false, typename T>
-KFR_INTRIN void butterfly8(cvec<T, N> a0, cvec<T, N> a1, cvec<T, N> a2, cvec<T, N> a3, cvec<T, N> a4,
- cvec<T, N> a5, cvec<T, N> a6, cvec<T, N> a7, cvec<T, N>& w0, cvec<T, N>& w1,
+KFR_INTRIN void butterfly8(const cvec<T, N>& a0, const cvec<T, N>& a1, const cvec<T, N>& a2,
+ const cvec<T, N>& a3, const cvec<T, N>& a4, const cvec<T, N>& a5,
+ const cvec<T, N>& a6, const cvec<T, N>& a7, cvec<T, N>& w0, cvec<T, N>& w1,
cvec<T, N>& w2, cvec<T, N>& w3, cvec<T, N>& w4, cvec<T, N>& w5, cvec<T, N>& w6,
cvec<T, N>& w7)
{
cvec<T, N> b0 = a0, b2 = a2, b4 = a4, b6 = a6;
- butterfly4<N, inverse>(cbool<false>, b0, b2, b4, b6, b0, b2, b4, b6);
+ butterfly4<N, inverse>(cfalse, b0, b2, b4, b6, b0, b2, b4, b6);
cvec<T, N> b1 = a1, b3 = a3, b5 = a5, b7 = a7;
- butterfly4<N, inverse>(cbool<false>, b1, b3, b5, b7, b1, b3, b5, b7);
+ butterfly4<N, inverse>(cfalse, b1, b3, b5, b7, b1, b3, b5, b7);
w0 = b0 + b1;
w4 = b0 - b1;
@@ -637,7 +651,7 @@ KFR_INTRIN void butterfly8(cvec<T, 2>& a01, cvec<T, 2>& a23, cvec<T, 2>& a45, cv
{
cvec<T, 2> b01 = a01, b23 = a23, b45 = a45, b67 = a67;
- butterfly4<2, inverse>(cbool<false>, b01, b23, b45, b67, b01, b23, b45, b67);
+ butterfly4<2, inverse>(cfalse, b01, b23, b45, b67, b01, b23, b45, b67);
cvec<T, 2> b02, b13, b46, b57;
@@ -645,8 +659,8 @@ KFR_INTRIN void butterfly8(cvec<T, 2>& a01, cvec<T, 2>& a23, cvec<T, 2>& a45, cv
cvec<T, 8> b02461357 = concat(even<2>(b01234567), odd<2>(b01234567));
split(b02461357, b02, b46, b13, b57);
- b13 = cmul(b13, fixed_twiddle<T, 2, 8, 0, 1, inverse>);
- b57 = cmul(b57, fixed_twiddle<T, 2, 8, 2, 1, inverse>);
+ b13 = cmul(b13, fixed_twiddle<T, 2, 8, 0, 1, inverse>());
+ b57 = cmul(b57, fixed_twiddle<T, 2, 8, 2, 1, inverse>());
a01 = b02 + b13;
a23 = b46 + b57;
a45 = b02 - b13;
@@ -669,13 +683,13 @@ KFR_INTRIN void butterfly32(cvec<T, 32>& v32)
split(v32, w0, w1, w2, w3, w4, w5, w6, w7);
butterfly8<4, inverse>(w0, w1, w2, w3, w4, w5, w6, w7);
- w1 = cmul(w1, fixed_twiddle<T, 4, 32, 0, 1, inverse>);
- w2 = cmul(w2, fixed_twiddle<T, 4, 32, 0, 2, inverse>);
- w3 = cmul(w3, fixed_twiddle<T, 4, 32, 0, 3, inverse>);
- w4 = cmul(w4, fixed_twiddle<T, 4, 32, 0, 4, inverse>);
- w5 = cmul(w5, fixed_twiddle<T, 4, 32, 0, 5, inverse>);
- w6 = cmul(w6, fixed_twiddle<T, 4, 32, 0, 6, inverse>);
- w7 = cmul(w7, fixed_twiddle<T, 4, 32, 0, 7, inverse>);
+ w1 = cmul(w1, fixed_twiddle<T, 4, 32, 0, 1, inverse>());
+ w2 = cmul(w2, fixed_twiddle<T, 4, 32, 0, 2, inverse>());
+ w3 = cmul(w3, fixed_twiddle<T, 4, 32, 0, 3, inverse>());
+ w4 = cmul(w4, fixed_twiddle<T, 4, 32, 0, 4, inverse>());
+ w5 = cmul(w5, fixed_twiddle<T, 4, 32, 0, 5, inverse>());
+ w6 = cmul(w6, fixed_twiddle<T, 4, 32, 0, 6, inverse>());
+ w7 = cmul(w7, fixed_twiddle<T, 4, 32, 0, 7, inverse>());
cvec<T, 8> z0, z1, z2, z3;
transpose4x8(w0, w1, w2, w3, w4, w5, w6, w7, z0, z1, z2, z3);
@@ -707,7 +721,7 @@ KFR_INTRIN void butterfly2(cvec<T, N * 2>& a01)
}
template <size_t N, bool inverse = false, bool split_format = false, typename T>
-KFR_INTRIN void apply_twiddle(cvec<T, N> a1, cvec<T, N> tw1, cvec<T, N>& w1)
+KFR_INTRIN void apply_twiddle(const cvec<T, N>& a1, const cvec<T, N>& tw1, cvec<T, N>& w1)
{
if (split_format)
{
@@ -723,18 +737,20 @@ KFR_INTRIN void apply_twiddle(cvec<T, N> a1, cvec<T, N> tw1, cvec<T, N>& w1)
}
else
{
- cvec<T, N> b1 = a1 * dupeven(tw1);
- a1 = swap<2>(a1);
+ const cvec<T, N> b1 = a1 * dupeven(tw1);
+ const cvec<T, N> a1_ = swap<2>(a1);
+ cvec<T, N> tw1_ = tw1;
if (inverse)
- tw1 = -(tw1);
- w1 = subadd(b1, a1 * dupodd(tw1));
+ tw1_ = -(tw1_);
+ w1 = subadd(b1, a1_ * dupodd(tw1_));
}
}
template <size_t N, bool inverse = false, bool split_format = false, typename T>
-KFR_INTRIN void apply_twiddles4(cvec<T, N> a1, cvec<T, N> a2, cvec<T, N> a3, cvec<T, N> tw1, cvec<T, N> tw2,
- cvec<T, N> tw3, cvec<T, N>& w1, cvec<T, N>& w2, cvec<T, N>& w3)
+KFR_INTRIN void apply_twiddles4(const cvec<T, N>& a1, const cvec<T, N>& a2, const cvec<T, N>& a3,
+ const cvec<T, N>& tw1, const cvec<T, N>& tw2, const cvec<T, N>& tw3,
+ cvec<T, N>& w1, cvec<T, N>& w2, cvec<T, N>& w3)
{
apply_twiddle<N, inverse, split_format>(a1, tw1, w1);
apply_twiddle<N, inverse, split_format>(a2, tw2, w2);
@@ -743,14 +759,16 @@ KFR_INTRIN void apply_twiddles4(cvec<T, N> a1, cvec<T, N> a2, cvec<T, N> a3, cve
template <size_t N, bool inverse = false, typename T>
KFR_INTRIN void apply_twiddles4(cvec<T, N>& __restrict a1, cvec<T, N>& __restrict a2,
- cvec<T, N>& __restrict a3, cvec<T, N> tw1, cvec<T, N> tw2, cvec<T, N> tw3)
+ cvec<T, N>& __restrict a3, const cvec<T, N>& tw1, const cvec<T, N>& tw2,
+ const cvec<T, N>& tw3)
{
apply_twiddles4<N, inverse>(a1, a2, a3, tw1, tw2, tw3, a1, a2, a3);
}
template <size_t N, bool inverse = false, typename T, typename = u8[N - 1]>
KFR_INTRIN void apply_twiddles4(cvec<T, N>& __restrict a1, cvec<T, N>& __restrict a2,
- cvec<T, N>& __restrict a3, cvec<T, 1> tw1, cvec<T, 1> tw2, cvec<T, 1> tw3)
+ cvec<T, N>& __restrict a3, const cvec<T, 1>& tw1, const cvec<T, 1>& tw2,
+ const cvec<T, 1>& tw3)
{
apply_twiddles4<N, inverse>(a1, a2, a3, resize<N * 2>(tw1), resize<N * 2>(tw2), resize<N * 2>(tw3));
}
@@ -800,9 +818,9 @@ KFR_INTRIN void apply_twiddles4(cvec<T, N * 4>& __restrict a0123)
cvec<T, N> a3;
split(a0123, a0, a1, a2, a3);
- cvec<T, N> tw1 = fixed_twiddle<T, N, 64, n2 * nnstep * 1, nnstep * 1, inverse>,
- tw2 = fixed_twiddle<T, N, 64, n2 * nnstep * 2, nnstep * 2, inverse>,
- tw3 = fixed_twiddle<T, N, 64, n2 * nnstep * 3, nnstep * 3, inverse>;
+ cvec<T, N> tw1 = fixed_twiddle<T, N, 64, n2 * nnstep * 1, nnstep * 1, inverse>(),
+ tw2 = fixed_twiddle<T, N, 64, n2 * nnstep * 2, nnstep * 2, inverse>(),
+ tw3 = fixed_twiddle<T, N, 64, n2 * nnstep * 3, nnstep * 3, inverse>();
apply_twiddles4<N>(a1, a2, a3, tw1, tw2, tw3);
@@ -984,23 +1002,23 @@ KFR_INTRIN void butterfly16_multi_flip(complex<T>* out, const complex<T>* in)
w3 = digitreverse4<2>(w3);
transpose4(w0, w1, w2, w3);
- cwrite<16>(out + index * 64 + 16 * 0, cmul(w0, fixed_twiddle<T, 16, 256, 0, index * 4 + 0, inverse>));
- cwrite<16>(out + index * 64 + 16 * 1, cmul(w1, fixed_twiddle<T, 16, 256, 0, index * 4 + 1, inverse>));
- cwrite<16>(out + index * 64 + 16 * 2, cmul(w2, fixed_twiddle<T, 16, 256, 0, index * 4 + 2, inverse>));
- cwrite<16>(out + index * 64 + 16 * 3, cmul(w3, fixed_twiddle<T, 16, 256, 0, index * 4 + 3, inverse>));
+ cwrite<16>(out + index * 64 + 16 * 0, cmul(w0, fixed_twiddle<T, 16, 256, 0, index * 4 + 0, inverse>()));
+ cwrite<16>(out + index * 64 + 16 * 1, cmul(w1, fixed_twiddle<T, 16, 256, 0, index * 4 + 1, inverse>()));
+ cwrite<16>(out + index * 64 + 16 * 2, cmul(w2, fixed_twiddle<T, 16, 256, 0, index * 4 + 2, inverse>()));
+ cwrite<16>(out + index * 64 + 16 * 3, cmul(w3, fixed_twiddle<T, 16, 256, 0, index * 4 + 3, inverse>()));
}
template <size_t n2, size_t nnstep, size_t N, typename T>
KFR_INTRIN void apply_twiddles2(cvec<T, N>& a1)
{
- cvec<T, N> tw1 = fixed_twiddle<T, N, 64, n2 * nnstep * 1, nnstep * 1>;
+ cvec<T, N> tw1 = fixed_twiddle<T, N, 64, n2 * nnstep * 1, nnstep * 1>();
a1 = cmul(a1, tw1);
}
template <size_t N, bool inverse = false, typename T>
-KFR_INTRIN void butterfly3(cvec<T, N> a00, cvec<T, N> a01, cvec<T, N> a02, cvec<T, N>& w00, cvec<T, N>& w01,
- cvec<T, N>& w02)
+KFR_INTRIN void butterfly3(const cvec<T, N>& a00, const cvec<T, N>& a01, const cvec<T, N>& a02,
+ cvec<T, N>& w00, cvec<T, N>& w01, cvec<T, N>& w02)
{
constexpr cvec<T, N> tw3r1 = static_cast<T>(-0.5);
constexpr cvec<T, N> tw3i1 =
@@ -1025,9 +1043,9 @@ KFR_INTRIN void butterfly3(cvec<T, N>& a0, cvec<T, N>& a1, cvec<T, N>& a2)
}
template <size_t N, bool inverse = false, typename T>
-KFR_INTRIN void butterfly6(cvec<T, N> a0, cvec<T, N> a1, cvec<T, N> a2, cvec<T, N> a3, cvec<T, N> a4,
- cvec<T, N> a5, cvec<T, N>& w0, cvec<T, N>& w1, cvec<T, N>& w2, cvec<T, N>& w3,
- cvec<T, N>& w4, cvec<T, N>& w5)
+KFR_INTRIN void butterfly6(const cvec<T, N>& a0, const cvec<T, N>& a1, const cvec<T, N>& a2,
+ const cvec<T, N>& a3, const cvec<T, N>& a4, const cvec<T, N>& a5, cvec<T, N>& w0,
+ cvec<T, N>& w1, cvec<T, N>& w2, cvec<T, N>& w3, cvec<T, N>& w4, cvec<T, N>& w5)
{
cvec<T, N* 2> a03 = concat(a0, a3);
cvec<T, N* 2> a25 = concat(a2, a5);
@@ -1056,8 +1074,9 @@ KFR_INTRIN void butterfly6(cvec<T, N>& a0, cvec<T, N>& a1, cvec<T, N>& a2, cvec<
}
template <size_t N, bool inverse = false, typename T>
-KFR_INTRIN void butterfly7(cvec<T, N> a00, cvec<T, N> a01, cvec<T, N> a02, cvec<T, N> a03, cvec<T, N> a04,
- cvec<T, N> a05, cvec<T, N> a06, cvec<T, N>& w00, cvec<T, N>& w01, cvec<T, N>& w02,
+KFR_INTRIN void butterfly7(const cvec<T, N>& a00, const cvec<T, N>& a01, const cvec<T, N>& a02,
+ const cvec<T, N>& a03, const cvec<T, N>& a04, const cvec<T, N>& a05,
+ const cvec<T, N>& a06, cvec<T, N>& w00, cvec<T, N>& w01, cvec<T, N>& w02,
cvec<T, N>& w03, cvec<T, N>& w04, cvec<T, N>& w05, cvec<T, N>& w06)
{
constexpr cvec<T, N> tw7r1 = static_cast<T>(0.623489801858733530525004884);
@@ -1102,9 +1121,9 @@ KFR_INTRIN void butterfly7(cvec<T, N>& a0, cvec<T, N>& a1, cvec<T, N>& a2, cvec<
}
template <size_t N, bool inverse = false, typename T>
-KFR_INTRIN void butterfly5(cvec<T, N> a00, cvec<T, N> a01, cvec<T, N> a02, cvec<T, N> a03, cvec<T, N> a04,
- cvec<T, N>& w00, cvec<T, N>& w01, cvec<T, N>& w02, cvec<T, N>& w03,
- cvec<T, N>& w04)
+KFR_INTRIN void butterfly5(const cvec<T, N>& a00, const cvec<T, N>& a01, const cvec<T, N>& a02,
+ const cvec<T, N>& a03, const cvec<T, N>& a04, cvec<T, N>& w00, cvec<T, N>& w01,
+ cvec<T, N>& w02, cvec<T, N>& w03, cvec<T, N>& w04)
{
constexpr cvec<T, N> tw5r1 = static_cast<T>(0.30901699437494742410229341718);
constexpr cvec<T, N> tw5i1 =
@@ -1132,10 +1151,12 @@ KFR_INTRIN void butterfly5(cvec<T, N> a00, cvec<T, N> a01, cvec<T, N> a02, cvec<
}
template <size_t N, bool inverse = false, typename T>
-KFR_INTRIN void butterfly10(cvec<T, N> a0, cvec<T, N> a1, cvec<T, N> a2, cvec<T, N> a3, cvec<T, N> a4,
- cvec<T, N> a5, cvec<T, N> a6, cvec<T, N> a7, cvec<T, N> a8, cvec<T, N> a9,
- cvec<T, N>& w0, cvec<T, N>& w1, cvec<T, N>& w2, cvec<T, N>& w3, cvec<T, N>& w4,
- cvec<T, N>& w5, cvec<T, N>& w6, cvec<T, N>& w7, cvec<T, N>& w8, cvec<T, N>& w9)
+KFR_INTRIN void butterfly10(const cvec<T, N>& a0, const cvec<T, N>& a1, const cvec<T, N>& a2,
+ const cvec<T, N>& a3, const cvec<T, N>& a4, const cvec<T, N>& a5,
+ const cvec<T, N>& a6, const cvec<T, N>& a7, const cvec<T, N>& a8,
+ const cvec<T, N>& a9, cvec<T, N>& w0, cvec<T, N>& w1, cvec<T, N>& w2,
+ cvec<T, N>& w3, cvec<T, N>& w4, cvec<T, N>& w5, cvec<T, N>& w6, cvec<T, N>& w7,
+ cvec<T, N>& w8, cvec<T, N>& w9)
{
cvec<T, N* 2> a05 = concat(a0, a5);
cvec<T, N* 2> a27 = concat(a2, a7);
@@ -1170,64 +1191,69 @@ KFR_INTRIN void butterfly10(cvec<T, N> a0, cvec<T, N> a1, cvec<T, N> a2, cvec<T,
}
template <bool inverse, typename T, size_t N>
-KFR_INTRIN void butterfly(cbool_t<inverse>, vec<T, N> in0, vec<T, N> in1, vec<T, N>& out0, vec<T, N>& out1)
+KFR_INTRIN void butterfly(cbool_t<inverse>, const vec<T, N>& in0, const vec<T, N>& in1, vec<T, N>& out0,
+ vec<T, N>& out1)
{
butterfly2<N / 2>(in0, in1, out0, out1);
}
template <bool inverse, typename T, size_t N>
-KFR_INTRIN void butterfly(cbool_t<inverse>, vec<T, N> in0, vec<T, N> in1, vec<T, N> in2, vec<T, N>& out0,
- vec<T, N>& out1, vec<T, N>& out2)
+KFR_INTRIN void butterfly(cbool_t<inverse>, const vec<T, N>& in0, const vec<T, N>& in1, const vec<T, N>& in2,
+ vec<T, N>& out0, vec<T, N>& out1, vec<T, N>& out2)
{
butterfly3<N / 2, inverse>(in0, in1, in2, out0, out1, out2);
}
template <bool inverse, typename T, size_t N>
-KFR_INTRIN void butterfly(cbool_t<inverse>, vec<T, N> in0, vec<T, N> in1, vec<T, N> in2, vec<T, N> in3,
- vec<T, N>& out0, vec<T, N>& out1, vec<T, N>& out2, vec<T, N>& out3)
+KFR_INTRIN void butterfly(cbool_t<inverse>, const vec<T, N>& in0, const vec<T, N>& in1, const vec<T, N>& in2,
+ const vec<T, N>& in3, vec<T, N>& out0, vec<T, N>& out1, vec<T, N>& out2,
+ vec<T, N>& out3)
{
butterfly4<N / 2, inverse>(cfalse, in0, in1, in2, in3, out0, out1, out2, out3);
}
template <bool inverse, typename T, size_t N>
-KFR_INTRIN void butterfly(cbool_t<inverse>, vec<T, N> in0, vec<T, N> in1, vec<T, N> in2, vec<T, N> in3,
- vec<T, N> in4, vec<T, N>& out0, vec<T, N>& out1, vec<T, N>& out2, vec<T, N>& out3,
- vec<T, N>& out4)
+KFR_INTRIN void butterfly(cbool_t<inverse>, const vec<T, N>& in0, const vec<T, N>& in1, const vec<T, N>& in2,
+ const vec<T, N>& in3, const vec<T, N>& in4, vec<T, N>& out0, vec<T, N>& out1,
+ vec<T, N>& out2, vec<T, N>& out3, vec<T, N>& out4)
{
butterfly5<N / 2, inverse>(in0, in1, in2, in3, in4, out0, out1, out2, out3, out4);
}
template <bool inverse, typename T, size_t N>
-KFR_INTRIN void butterfly(cbool_t<inverse>, vec<T, N> in0, vec<T, N> in1, vec<T, N> in2, vec<T, N> in3,
- vec<T, N> in4, vec<T, N> in5, vec<T, N>& out0, vec<T, N>& out1, vec<T, N>& out2,
- vec<T, N>& out3, vec<T, N>& out4, vec<T, N>& out5)
+KFR_INTRIN void butterfly(cbool_t<inverse>, const vec<T, N>& in0, const vec<T, N>& in1, const vec<T, N>& in2,
+ const vec<T, N>& in3, const vec<T, N>& in4, const vec<T, N>& in5, vec<T, N>& out0,
+ vec<T, N>& out1, vec<T, N>& out2, vec<T, N>& out3, vec<T, N>& out4, vec<T, N>& out5)
{
butterfly6<N / 2, inverse>(in0, in1, in2, in3, in4, in5, out0, out1, out2, out3, out4, out5);
}
template <bool inverse, typename T, size_t N>
-KFR_INTRIN void butterfly(cbool_t<inverse>, vec<T, N> in0, vec<T, N> in1, vec<T, N> in2, vec<T, N> in3,
- vec<T, N> in4, vec<T, N> in5, vec<T, N> in6, vec<T, N>& out0, vec<T, N>& out1,
- vec<T, N>& out2, vec<T, N>& out3, vec<T, N>& out4, vec<T, N>& out5, vec<T, N>& out6)
+KFR_INTRIN void butterfly(cbool_t<inverse>, const vec<T, N>& in0, const vec<T, N>& in1, const vec<T, N>& in2,
+ const vec<T, N>& in3, const vec<T, N>& in4, const vec<T, N>& in5,
+ const vec<T, N>& in6, vec<T, N>& out0, vec<T, N>& out1, vec<T, N>& out2,
+ vec<T, N>& out3, vec<T, N>& out4, vec<T, N>& out5, vec<T, N>& out6)
{
butterfly7<N / 2, inverse>(in0, in1, in2, in3, in4, in5, in6, out0, out1, out2, out3, out4, out5, out6);
}
template <bool inverse, typename T, size_t N>
-KFR_INTRIN void butterfly(cbool_t<inverse>, vec<T, N> in0, vec<T, N> in1, vec<T, N> in2, vec<T, N> in3,
- vec<T, N> in4, vec<T, N> in5, vec<T, N> in6, vec<T, N> in7, vec<T, N>& out0,
- vec<T, N>& out1, vec<T, N>& out2, vec<T, N>& out3, vec<T, N>& out4, vec<T, N>& out5,
- vec<T, N>& out6, vec<T, N>& out7)
+KFR_INTRIN void butterfly(cbool_t<inverse>, const vec<T, N>& in0, const vec<T, N>& in1, const vec<T, N>& in2,
+ const vec<T, N>& in3, const vec<T, N>& in4, const vec<T, N>& in5,
+ const vec<T, N>& in6, const vec<T, N>& in7, vec<T, N>& out0, vec<T, N>& out1,
+ vec<T, N>& out2, vec<T, N>& out3, vec<T, N>& out4, vec<T, N>& out5, vec<T, N>& out6,
+ vec<T, N>& out7)
{
butterfly8<N / 2, inverse>(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3, out4, out5,
out6, out7);
}
template <bool inverse, typename T, size_t N>
-KFR_INTRIN void butterfly(cbool_t<inverse>, vec<T, N> in0, vec<T, N> in1, vec<T, N> in2, vec<T, N> in3,
- vec<T, N> in4, vec<T, N> in5, vec<T, N> in6, vec<T, N> in7, vec<T, N> in8,
- vec<T, N> in9, vec<T, N>& out0, vec<T, N>& out1, vec<T, N>& out2, vec<T, N>& out3,
- vec<T, N>& out4, vec<T, N>& out5, vec<T, N>& out6, vec<T, N>& out7, vec<T, N>& out8,
- vec<T, N>& out9)
+KFR_INTRIN void butterfly(cbool_t<inverse>, const vec<T, N>& in0, const vec<T, N>& in1, const vec<T, N>& in2,
+ const vec<T, N>& in3, const vec<T, N>& in4, const vec<T, N>& in5,
+ const vec<T, N>& in6, const vec<T, N>& in7, const vec<T, N>& in8,
+ const vec<T, N>& in9, vec<T, N>& out0, vec<T, N>& out1, vec<T, N>& out2,
+ vec<T, N>& out3, vec<T, N>& out4, vec<T, N>& out5, vec<T, N>& out6, vec<T, N>& out7,
+ vec<T, N>& out8, vec<T, N>& out9)
{
butterfly10<N / 2, inverse>(in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, out0, out1, out2, out3,
out4, out5, out6, out7, out8, out9);
}
-template <bool transposed, typename T, size_t... N, size_t Nout = csum(csizes<N...>)>
+template <bool transposed, typename T, size_t... N, size_t Nout = csum<size_t, N...>()>
KFR_INTRIN void cread_transposed(cbool_t<transposed>, const complex<T>* ptr, vec<T, N>&... w)
{
vec<T, Nout> temp = read<Nout>(ptr_cast<T>(ptr));
@@ -1255,7 +1281,7 @@ KFR_INTRIN void cread_transposed(cbool_t<true>, const complex<f32>* ptr, cvec<f3
w4 = cgather<4, 5>(ptr + 4);
}
-template <bool transposed, typename T, size_t... N, size_t Nout = csum(csizes<N...>)>
+template <bool transposed, typename T, size_t... N, size_t Nout = csum<size_t, N...>()>
KFR_INTRIN void cwrite_transposed(cbool_t<transposed>, complex<T>* ptr, vec<T, N>... args)
{
auto temp = concat(args...);
@@ -1265,12 +1291,12 @@ KFR_INTRIN void cwrite_transposed(cbool_t<transposed>, complex<T>* ptr, vec<T, N
}
template <size_t I, size_t radix, typename T, size_t N, size_t width = N / 2>
-KFR_INTRIN vec<T, N> mul_tw(cbool_t<false>, vec<T, N> x, const complex<T>* twiddle)
+KFR_INTRIN vec<T, N> mul_tw(cbool_t<false>, const vec<T, N>& x, const complex<T>* twiddle)
{
return I == 0 ? x : cmul(x, cread<width>(twiddle + width * (I - 1)));
}
template <size_t I, size_t radix, typename T, size_t N, size_t width = N / 2>
-KFR_INTRIN vec<T, N> mul_tw(cbool_t<true>, vec<T, N> x, const complex<T>* twiddle)
+KFR_INTRIN vec<T, N> mul_tw(cbool_t<true>, const vec<T, N>& x, const complex<T>* twiddle)
{
return I == 0 ? x : cmul_conj(x, cread<width>(twiddle + width * (I - 1)));
}
@@ -1282,13 +1308,14 @@ KFR_INTRIN void butterfly_helper(csizes_t<I...>, size_t i, csize_t<width>, csize
{
carray<cvec<T, width>, radix> inout;
- swallow{ (inout.get(csize<I>) = cread<width>(in + i + stride * I))... };
+ swallow{ (inout.get(csize_t<I>()) = cread<width>(in + i + stride * I))... };
- butterfly(cbool<inverse>, inout.template get<I>()..., inout.template get<I>()...);
+ butterfly(cbool_t<inverse>(), inout.template get<I>()..., inout.template get<I>()...);
- swallow{ (cwrite<width>(out + i + stride * I,
- mul_tw<I, radix>(cbool<inverse>, inout.template get<I>(), tw + i * (radix - 1))),
- 0)... };
+ swallow{ (
+ cwrite<width>(out + i + stride * I,
+ mul_tw<I, radix>(cbool_t<inverse>(), inout.template get<I>(), tw + i * (radix - 1))),
+ 0)... };
}
// Final
@@ -1299,17 +1326,17 @@ KFR_INTRIN void butterfly_helper(csizes_t<I...>, size_t i, csize_t<width>, csize
carray<cvec<T, width>, radix> inout;
// swallow{ ( inout.get( csize<I> ) = infn( i, I, cvec<T, width>( ) ) )... };
- cread_transposed(cbool<true>, in + i * radix, inout.template get<I>()...);
+ cread_transposed(ctrue, in + i * radix, inout.template get<I>()...);
- butterfly(cbool<inverse>, inout.template get<I>()..., inout.template get<I>()...);
+ butterfly(cbool_t<inverse>(), inout.template get<I>()..., inout.template get<I>()...);
- swallow{ (cwrite<width>(out + i + stride * I, inout.get(csize<I>)), 0)... };
+ swallow{ (cwrite<width>(out + i + stride * I, inout.get(csize_t<I>())), 0)... };
}
template <size_t width, size_t radix, typename... Args>
KFR_INTRIN void butterfly(size_t i, csize_t<width>, csize_t<radix>, Args&&... args)
{
- butterfly_helper(csizeseq<radix>, i, csize<width>, csize<radix>, std::forward<Args>(args)...);
+ butterfly_helper(csizeseq_t<radix>(), i, csize_t<width>(), csize_t<radix>(), std::forward<Args>(args)...);
}
template <typename... Args>
@@ -1321,8 +1348,8 @@ KFR_INTRIN void butterfly_cycle(size_t& i, size_t count, csize_t<width>, Args&&.
{
CMT_LOOP_NOUNROLL
for (; i < count / width * width; i += width)
- butterfly(i, csize<width>, std::forward<Args>(args)...);
- butterfly_cycle(i, count, csize<width / 2>, std::forward<Args>(args)...);
+ butterfly(i, csize_t<width>(), std::forward<Args>(args)...);
+ butterfly_cycle(i, count, csize_t<width / 2>(), std::forward<Args>(args)...);
}
template <size_t width, typename... Args>
@@ -1330,7 +1357,7 @@ KFR_INTRIN void butterflies(size_t count, csize_t<width>, Args&&... args)
{
CMT_ASSUME(count > 0);
size_t i = 0;
- butterfly_cycle(i, count, csize<width>, std::forward<Args>(args)...);
+ butterfly_cycle(i, count, csize_t<width>(), std::forward<Args>(args)...);
}
template <typename T, bool inverse, typename Tstride>
@@ -1376,7 +1403,7 @@ KFR_INTRIN void generic_butterfly_cycle(csize_t<width>, size_t radix, cbool_t<in
reverse<2>(sum1));
}
}
- generic_butterfly_cycle(csize<width / 2>, radix, cbool<inverse>, out, in, ostride, halfradix,
+ generic_butterfly_cycle(csize_t<width / 2>(), radix, cbool_t<inverse>(), out, in, ostride, halfradix,
halfradix_sqr, twiddle, i);
}
@@ -1406,8 +1433,8 @@ KFR_INTRIN void generic_butterfly_w(size_t radix, cbool_t<inverse>, complex<T>*
CMT_ASSUME(halfradix > 0);
size_t i = 0;
- generic_butterfly_cycle(csize<width>, radix, cbool<inverse>, out, in, ostride, halfradix, halfradix_sqr,
- twiddle, i);
+ generic_butterfly_cycle(csize_t<width>(), radix, cbool_t<inverse>(), out, in, ostride, halfradix,
+ halfradix_sqr, twiddle, i);
}
template <typename T, bool inverse, typename Tstride = csize_t<1>>
@@ -1419,14 +1446,14 @@ KFR_INTRIN void generic_butterfly(size_t radix, cbool_t<inverse>, complex<T>* ou
builtin_memcpy(temp, in, sizeof(complex<T>) * radix);
in = temp;
}
- constexpr size_t width = vector_width<T, cpu_t::native>;
+ constexpr size_t width = platform<T>::vector_width;
- cswitch(csizes<11>, radix,
+ cswitch(csizes_t<11>(), radix,
[&](auto radix_) CMT_INLINE_LAMBDA {
- generic_butterfly_w<width>(decltype(radix_)(), cbool<inverse>, out, in, twiddle, ostride);
+ generic_butterfly_w<width>(decltype(radix_)(), cbool_t<inverse>(), out, in, twiddle, ostride);
},
[&]() CMT_INLINE_LAMBDA {
- generic_butterfly_w<width>(radix, cbool<inverse>, out, in, twiddle, ostride);
+ generic_butterfly_w<width>(radix, cbool_t<inverse>(), out, in, twiddle, ostride);
});
}
@@ -1437,13 +1464,13 @@ template <typename T, size_t N>
constexpr cvec<T, N> cmask0088 = broadcast<N * 4, T>(T(), T(), -T(), -T());
template <bool A = false, typename T, size_t N>
-KFR_INTRIN void cbitreverse_write(complex<T>* dest, vec<T, N> x)
+KFR_INTRIN void cbitreverse_write(complex<T>* dest, const vec<T, N>& x)
{
cwrite<N / 2, A>(dest, bitreverse<2>(x));
}
template <bool A = false, typename T, size_t N>
-KFR_INTRIN void cdigitreverse4_write(complex<T>* dest, vec<T, N> x)
+KFR_INTRIN void cdigitreverse4_write(complex<T>* dest, const vec<T, N>& x)
{
cwrite<N / 2, A>(dest, digitreverse4<2>(x));
}
@@ -1471,7 +1498,7 @@ KFR_INTRIN cvec<f64, 16> cdigitreverse4_read<16, false, f64>(const complex<f64>*
cread<1>(src + 3), cread<1>(src + 7), cread<1>(src + 11), cread<1>(src + 15));
}
template <>
-KFR_INTRIN void cdigitreverse4_write<false, f64, 32>(complex<f64>* dest, vec<f64, 32> x)
+KFR_INTRIN void cdigitreverse4_write<false, f64, 32>(complex<f64>* dest, const vec<f64, 32>& x)
{
cwrite<1>(dest, part<16, 0>(x));
cwrite<1>(dest + 4, part<16, 1>(x));
@@ -1496,3 +1523,5 @@ KFR_INTRIN void cdigitreverse4_write<false, f64, 32>(complex<f64>* dest, vec<f64
#endif
}
}
+
+CMT_PRAGMA_MSVC(warning(pop))
diff --git a/include/kfr/dsp/biquad.hpp b/include/kfr/dsp/biquad.hpp
@@ -108,7 +108,7 @@ struct biquad_block
vec<T, filters> b2;
constexpr biquad_block() noexcept : a1(0), a2(0), b0(1), b1(0), b2(0) {}
- constexpr biquad_block(const biquad_params<T>* bq, size_t count) noexcept
+ CMT_GNU_CONSTEXPR biquad_block(const biquad_params<T>* bq, size_t count) noexcept
{
count = count > filters ? filters : count;
for (size_t i = 0; i < count; i++)
@@ -161,7 +161,7 @@ struct expression_biquads : public expression<E1>
return out;
}
KFR_SINTRIN vec<T, filters> process(const biquad_block<T, filters>& bq, biquad_state<T, filters>& state,
- vec<T, filters> in)
+ const vec<T, filters>& in)
{
const vec<T, filters> out = bq.b0 * in + state.s1;
state.s1 = state.s2 + bq.b1 * in - bq.a1 * out;
@@ -301,7 +301,7 @@ CMT_INLINE internal::expression_biquads_zl<filters, T, internal::arg<E1>> biquad
template <typename T, typename E1>
CMT_INLINE expression_pointer<T> biquad_zl(const biquad_params<T>* bq, size_t count, E1&& e1)
{
- return cswitch(csizes<1, 2, 4, 8, 16, 32, 64>, next_poweroftwo(count),
+ return cswitch(csizes_t<1, 2, 4, 8, 16, 32, 64>(), next_poweroftwo(count),
[&](auto x) {
constexpr size_t filters = x;
return to_pointer(internal::expression_biquads_zl<filters, T, internal::arg<E1>>(
diff --git a/include/kfr/dsp/delay.hpp b/include/kfr/dsp/delay.hpp
@@ -103,7 +103,7 @@ struct expression_delay<1, E> : expression<E>
* @endcode
*/
template <size_t samples = 1, typename E1>
-CMT_INLINE internal::expression_delay<samples, E1> delay(E1&& e1, csize_t<samples> = csize<samples>)
+CMT_INLINE internal::expression_delay<samples, E1> delay(E1&& e1, csize_t<samples> = csize_t<samples>())
{
static_assert(samples >= 1 && samples < 1024, "");
return internal::expression_delay<samples, E1>(std::forward<E1>(e1));
diff --git a/include/kfr/dsp/fir.hpp b/include/kfr/dsp/fir.hpp
@@ -61,7 +61,7 @@ struct expression_short_fir : expression<E1>
vec<T, N> in = this->argument_first(cinput, index, x);
vec<T, N> out = in * taps[0];
- cfor(csize<1>, csize<tapcount>,
+ cfor(csize_t<1>(), csize_t<tapcount>(),
[&](auto I) { out = out + concat_and_slice<tapcount - 1 - I, N>(delayline, in) * taps[I]; });
delayline = concat_and_slice<N, tapcount - 1>(delayline, in);
diff --git a/include/kfr/dsp/mixdown.hpp b/include/kfr/dsp/mixdown.hpp
@@ -45,7 +45,7 @@ struct stereo_matrix
template <typename T, size_t N>
CMT_INLINE vec<vec<T, 2>, N> operator()(const vec<vec<T, 2>, N>& x) const
{
- return process(x, csizeseq<N>);
+ return process(x, csizeseq_t<N>());
}
template <typename T, size_t N, size_t... indices>
CMT_INLINE vec<vec<T, 2>, N> process(const vec<vec<T, 2>, N>& x, csizes_t<indices...>) const
@@ -56,8 +56,16 @@ struct stereo_matrix
};
}
-constexpr f64x2x2 matrix_sum_diff() { return { f64x2{ 1, 1 }, f64x2{ 1, -1 } }; }
-constexpr f64x2x2 matrix_halfsum_halfdiff() { return { f64x2{ 0.5, 0.5 }, f64x2{ 0.5, -0.5 } }; }
+template <int = 0>
+CMT_GNU_CONSTEXPR f64x2x2 matrix_sum_diff()
+{
+ return { f64x2{ 1, 1 }, f64x2{ 1, -1 } };
+}
+template <int = 0>
+CMT_GNU_CONSTEXPR f64x2x2 matrix_halfsum_halfdiff()
+{
+ return { f64x2{ 0.5, 0.5 }, f64x2{ 0.5, -0.5 } };
+}
/**
* @brief Returns template expression that returns the vector of length 2 containing mix of the left and right
diff --git a/include/kfr/dsp/oscillators.hpp b/include/kfr/dsp/oscillators.hpp
@@ -34,7 +34,8 @@ namespace kfr
template <typename T = fbase>
auto jaehne(identity<T> magn, size_t size)
{
- return truncate(magn * sin(c_pi<T, 1, 2> * sqr(linspace(T(0), T(size), size, false)) / size), size);
+ return truncate(magn * sin(constants<T>::pi_s(1, 2) * sqr(linspace(T(0), T(size), size, false)) / size),
+ size);
}
template <typename T = fbase>
@@ -60,78 +61,78 @@ auto phasor(identity<T> frequency, identity<T> sample_rate)
namespace intrinsics
{
template <typename T>
-KFR_SINTRIN T rawsine(T x)
+KFR_SINTRIN T rawsine(const T& x)
{
- return intrinsics::fastsin(x * c_pi<T, 2>);
+ return intrinsics::fastsin(x * constants<T>::pi_s(2));
}
template <typename T>
-KFR_SINTRIN T sinenorm(T x)
+KFR_SINTRIN T sinenorm(const T& x)
{
return intrinsics::rawsine(fract(x));
}
template <typename T>
-KFR_SINTRIN T sine(T x)
+KFR_SINTRIN T sine(const T& x)
{
- return intrinsics::sinenorm(c_recip_pi<T, 1, 2> * x);
+ return intrinsics::sinenorm(constants<T>::recip_pi_s(1, 2) * x);
}
template <typename T>
-KFR_SINTRIN T rawsquare(T x)
+KFR_SINTRIN T rawsquare(const T& x)
{
return select(x < T(0.5), T(1), -T(1));
}
template <typename T>
-KFR_SINTRIN T squarenorm(T x)
+KFR_SINTRIN T squarenorm(const T& x)
{
return intrinsics::rawsquare(fract(x));
}
template <typename T>
-KFR_SINTRIN T square(T x)
+KFR_SINTRIN T square(const T& x)
{
- return intrinsics::squarenorm(c_recip_pi<T, 1, 2> * x);
+ return intrinsics::squarenorm(constants<T>::recip_pi_s(1, 2) * x);
}
template <typename T>
-KFR_SINTRIN T rawsawtooth(T x)
+KFR_SINTRIN T rawsawtooth(const T& x)
{
return T(1) - 2 * x;
}
template <typename T>
-KFR_SINTRIN T sawtoothnorm(T x)
+KFR_SINTRIN T sawtoothnorm(const T& x)
{
return intrinsics::rawsawtooth(fract(x));
}
template <typename T>
-KFR_SINTRIN T sawtooth(T x)
+KFR_SINTRIN T sawtooth(const T& x)
{
- return intrinsics::sawtoothnorm(c_recip_pi<T, 1, 2> * x);
+ return intrinsics::sawtoothnorm(constants<T>::recip_pi_s(1, 2) * x);
}
template <typename T>
-KFR_SINTRIN T isawtoothnorm(T x)
+KFR_SINTRIN T isawtoothnorm(const T& x)
{
return T(-1) + 2 * fract(x + 0.5);
}
template <typename T>
-KFR_SINTRIN T isawtooth(T x)
+KFR_SINTRIN T isawtooth(const T& x)
{
- return intrinsics::isawtoothnorm(c_recip_pi<T, 1, 2> * x);
+ return intrinsics::isawtoothnorm(constants<T>::recip_pi_s(1, 2) * x);
}
template <typename T>
-KFR_SINTRIN T rawtriangle(T x)
+KFR_SINTRIN T rawtriangle(const T& x)
{
return 1 - abs(4 * x - 2);
}
template <typename T>
-KFR_SINTRIN T trianglenorm(T x)
+KFR_SINTRIN T trianglenorm(const T& x)
{
return intrinsics::rawtriangle(fract(x + 0.25));
}
template <typename T>
-KFR_SINTRIN T triangle(T x)
+KFR_SINTRIN T triangle(const T& x)
{
- return intrinsics::trianglenorm(c_recip_pi<T, 1, 2> * x);
+ return intrinsics::trianglenorm(constants<T>::recip_pi_s(1, 2) * x);
}
}
KFR_I_FN(rawsine)
diff --git a/include/kfr/dsp/sample_rate_conversion.hpp b/include/kfr/dsp/sample_rate_conversion.hpp
@@ -297,13 +297,13 @@ struct expression_downsample<4, offset, E> : expression<E>
}
template <typename E1, size_t offset = 0>
-CMT_INLINE internal::expression_downsample<2, offset, E1> downsample2(E1&& e1, csize_t<offset> = csize<0>)
+CMT_INLINE internal::expression_downsample<2, offset, E1> downsample2(E1&& e1, csize_t<offset> = csize_t<0>())
{
return internal::expression_downsample<2, offset, E1>(std::forward<E1>(e1));
}
template <typename E1, size_t offset = 0>
-CMT_INLINE internal::expression_downsample<4, offset, E1> downsample4(E1&& e1, csize_t<offset> = csize<0>)
+CMT_INLINE internal::expression_downsample<4, offset, E1> downsample4(E1&& e1, csize_t<offset> = csize_t<0>())
{
return internal::expression_downsample<4, offset, E1>(std::forward<E1>(e1));
}
diff --git a/include/kfr/dsp/units.hpp b/include/kfr/dsp/units.hpp
@@ -37,28 +37,28 @@ using sample_rate_t = double;
namespace intrinsics
{
template <typename T, typename TF = flt_type<T>>
-KFR_SINTRIN TF amp_to_dB(T amp)
+KFR_SINTRIN TF amp_to_dB(const T& amp)
{
return log(static_cast<TF>(amp)) * subtype<TF>(8.6858896380650365530225783783322);
// return T( 20.0 ) * log10( level );
}
template <typename T, typename TF = flt_type<T>>
-KFR_SINTRIN TF dB_to_amp(T dB)
+KFR_SINTRIN TF dB_to_amp(const T& dB)
{
return exp(dB * subtype<TF>(0.11512925464970228420089957273422));
// return exp10( dB / 20 );
}
template <typename T, typename TF = flt_type<T>>
-KFR_SINTRIN TF amp_to_dB(T amp, T offset)
+KFR_SINTRIN TF amp_to_dB(const T& amp, const T& offset)
{
return log_fmadd(amp, subtype<TF>(8.6858896380650365530225783783322), offset);
// return T( 20.0 ) * log10( level );
}
template <typename T, typename TF = flt_type<T>>
-KFR_SINTRIN TF dB_to_amp(T dB, T offset)
+KFR_SINTRIN TF dB_to_amp(const T& dB, const T& offset)
{
auto offs = -subtype<TF>(0.11512925464970228420089957273422) * offset;
return exp_fmadd(dB, subtype<TF>(0.11512925464970228420089957273422), offs);
@@ -66,13 +66,13 @@ KFR_SINTRIN TF dB_to_amp(T dB, T offset)
}
template <typename T, typename Tout = flt_type<T>>
-KFR_SINTRIN Tout power_to_dB(T x)
+KFR_SINTRIN Tout power_to_dB(const T& x)
{
return log(x) * (10 * c_recip_log_10<Tout>);
}
template <typename T, typename Tout = flt_type<T>>
-KFR_SINTRIN Tout dB_to_power(T x)
+KFR_SINTRIN Tout dB_to_power(const T& x)
{
if (x == -c_infinity<Tout>)
return 0.0;
@@ -81,7 +81,7 @@ KFR_SINTRIN Tout dB_to_power(T x)
}
template <typename T, typename TF = flt_type<T>>
-KFR_SINTRIN TF note_to_hertz(T note)
+KFR_SINTRIN TF note_to_hertz(const T& note)
{
const subtype<TF> offset = 2.1011784386926213177653145771814;
@@ -89,7 +89,7 @@ KFR_SINTRIN TF note_to_hertz(T note)
}
template <typename T, typename TF = flt_type<T>>
-KFR_SINTRIN TF hertz_to_note(T hertz)
+KFR_SINTRIN TF hertz_to_note(const T& hertz)
{
const subtype<TF> offset = -36.376316562295915248836189714583;
@@ -97,7 +97,7 @@ KFR_SINTRIN TF hertz_to_note(T hertz)
}
template <typename T1, typename T2, typename T3, typename Tc = flt_type<common_type<T1, T2, T3, f32>>>
-KFR_SINTRIN Tc note_to_hertz(T1 note, T2 tunenote, T3 tunehertz)
+KFR_SINTRIN Tc note_to_hertz(const T1& note, const T2& tunenote, const T3& tunehertz)
{
const Tc offset = log(tunehertz) - tunenote * subtype<Tc>(0.05776226504666210911810267678818);
@@ -105,7 +105,7 @@ KFR_SINTRIN Tc note_to_hertz(T1 note, T2 tunenote, T3 tunehertz)
}
template <typename T1, typename T2, typename T3, typename Tc = flt_type<common_type<T1, T2, T3, f32>>>
-KFR_SINTRIN Tc hertz_to_note(T1 hertz, T2 tunenote, T3 tunehertz)
+KFR_SINTRIN Tc hertz_to_note(const T1& hertz, const T2& tunenote, const T3& tunehertz)
{
const Tc offset = tunenote - log(tunehertz) * subtype<Tc>(17.312340490667560888319096172023);
diff --git a/include/kfr/dsp/window.hpp b/include/kfr/dsp/window.hpp
@@ -604,10 +604,10 @@ CMT_NOINLINE expression_pointer<T> window(size_t size, window_type type, identit
ctype_t<T> = ctype_t<T>())
{
return cswitch(
- cvals<window_type, window_type::rectangular, window_type::triangular, window_type::bartlett,
- window_type::cosine, window_type::hann, window_type::bartlett_hann, window_type::hamming,
- window_type::bohman, window_type::blackman, window_type::blackman_harris, window_type::kaiser,
- window_type::flattop, window_type::gaussian, window_type::lanczos>,
+ cvals_t<window_type, window_type::rectangular, window_type::triangular, window_type::bartlett,
+ window_type::cosine, window_type::hann, window_type::bartlett_hann, window_type::hamming,
+ window_type::bohman, window_type::blackman, window_type::blackman_harris, window_type::kaiser,
+ window_type::flattop, window_type::gaussian, window_type::lanczos>(),
type,
[=](auto win) {
constexpr window_type window = val_of(decltype(win)());
diff --git a/include/kfr/io/audiofile.hpp b/include/kfr/io/audiofile.hpp
@@ -44,7 +44,8 @@ void write_interleaved(E1&& dest, const univector2d<Tin, Tag1, Tag2>& src)
}
else if (channels == 2)
{
- process(std::forward<E1>(dest), pack(src[0], src[1]), 0, infinite_size, nullptr, nullptr, csize<2>);
+ process(std::forward<E1>(dest), pack(src[0], src[1]), 0, infinite_size, nullptr, nullptr,
+ csize_t<2>());
}
else
{
@@ -89,7 +90,8 @@ constexpr range<fmax> audio_range<f64>()
inline size_t get_audiobitdepth(audiodatatype type)
{
- return (size_t[]){ 0, 16, 24, 24, 32, 32, 64 }[static_cast<int>(type)];
+ constexpr size_t bits[]{ 0, 16, 24, 24, 32, 32, 64 };
+ return static_cast<size_t>(type) < 7 ? bits[static_cast<size_t>(type)] : 0;
}
template <typename T>
@@ -130,6 +132,24 @@ static constexpr u32 FourCC(const char (&ch)[5])
return u32(u8(ch[0])) | u32(u8(ch[1])) << 8 | u32(u8(ch[2])) << 16 | u32(u8(ch[3])) << 24;
}
+constexpr u32 cWAVE_FORMAT_PCM = 1;
+constexpr u32 cWAVE_FORMAT_IEEE = 3;
+
+constexpr u32 ccRIFF = FourCC("RIFF");
+constexpr u32 ccWAVE = FourCC("WAVE");
+constexpr u32 ccfmt = FourCC("fmt ");
+constexpr u32 ccdata = FourCC("data");
+
+constexpr u32 ccFORM = FourCC("FORM");
+constexpr u32 ccAIFF = FourCC("AIFF");
+constexpr u32 ccAIFC = FourCC("AIFC");
+constexpr u32 ccCOMM = FourCC("COMM");
+constexpr u32 ccSSND = FourCC("SSND");
+constexpr u32 ccNONE = FourCC("NONE");
+constexpr u32 ccsowt = FourCC("sowt");
+
+CMT_PRAGMA_PACK_PUSH_1
+
struct WAV_FMT
{
i32 fId; // 'fmt '
@@ -140,20 +160,20 @@ struct WAV_FMT
i32 nAvgBytesPerSec;
i16 numBlockAlingn;
i16 numBitsPerSample;
-} __attribute__((packed));
+} CMT_GNU_PACKED;
struct WAV_DATA
{
i32 dId; // 'data' or 'fact'
i32 dLen;
u8 data[1];
-} __attribute__((packed));
+} CMT_GNU_PACKED;
struct WAV_DATA_HDR
{
i32 dId; // 'data' or 'fact'
i32 dLen;
-} __attribute__((packed));
+} CMT_GNU_PACKED;
struct AIFF_FMT
{
@@ -164,53 +184,39 @@ struct AIFF_FMT
i16 bitsPerSample;
f80 sampleRate;
i32 compression;
-} __attribute__((packed));
+} CMT_GNU_PACKED;
struct AIFF_DATA
{
i32 chunkID;
i32 chunkLen;
u32 offset;
-} __attribute__((packed));
-
-constexpr u32 cWAVE_FORMAT_PCM = 1;
-constexpr u32 cWAVE_FORMAT_IEEE = 3;
-
-constexpr u32 ccRIFF = FourCC("RIFF");
-constexpr u32 ccWAVE = FourCC("WAVE");
-constexpr u32 ccfmt = FourCC("fmt ");
-constexpr u32 ccdata = FourCC("data");
-
-constexpr u32 ccFORM = FourCC("FORM");
-constexpr u32 ccAIFF = FourCC("AIFF");
-constexpr u32 ccAIFC = FourCC("AIFC");
-constexpr u32 ccCOMM = FourCC("COMM");
-constexpr u32 ccSSND = FourCC("SSND");
-constexpr u32 ccNONE = FourCC("NONE");
-constexpr u32 ccsowt = FourCC("sowt");
+} CMT_GNU_PACKED;
struct RIFF_HDR
{
i32 riffID; // 'RIFF' or 'COMM'
i32 fileLen;
i32 formatID; // 'WAVE' or 'AIFF'
-} __attribute__((packed));
+} CMT_GNU_PACKED;
struct WAV_HEADER
{
RIFF_HDR riff;
WAV_FMT fmt;
WAV_DATA_HDR data;
-
-} __attribute__((packed));
+} CMT_GNU_PACKED;
struct CHUNK_HDR
{
i32 chunkID;
i32 chunkLen;
-} __attribute__((packed));
+} CMT_GNU_PACKED;
+
+CMT_PRAGMA_PACK_POP
-static bool audio_test_wav(const array_ref<u8>& rawbytes)
+template <size_t = 0>
+bool audio_test_wav(const array_ref<u8>& rawbytes)
{
if (rawbytes.size() < sizeof(RIFF_HDR))
{
@@ -228,7 +234,8 @@ static bool audio_test_wav(const array_ref<u8>& rawbytes)
return true;
}
-static bool audio_test_aiff(const array_ref<u8>& rawbytes)
+template <size_t = 0>
+bool audio_test_aiff(const array_ref<u8>& rawbytes)
{
if (rawbytes.size() < sizeof(RIFF_HDR))
{
@@ -255,7 +262,8 @@ enum class file_status
unsupported_bit_format
};
-static file_status audio_info_wav(audioformat& info, const array_ref<u8>& rawbytes)
+template <size_t = 0>
+file_status audio_info_wav(audioformat& info, const array_ref<u8>& rawbytes)
{
const CHUNK_HDR* chunk = ptr_cast<CHUNK_HDR>(rawbytes.data() + 12);
const void* end = ptr_cast<char>(rawbytes.end());
@@ -324,7 +332,8 @@ static file_status audio_info_wav(audioformat& info, const array_ref<u8>& rawbyt
return file_status::ok;
}
-static file_status audio_info(audioformat& info, const array_ref<u8>& file_bytes)
+template <size_t = 0>
+file_status audio_info(audioformat& info, const array_ref<u8>& file_bytes)
{
if (audio_test_wav(file_bytes))
return audio_info_wav(info, file_bytes);
diff --git a/include/kfr/io/python_plot.hpp b/include/kfr/io/python_plot.hpp
@@ -40,9 +40,9 @@ namespace kfr
{
namespace internal
{
-#pragma clang diagnostic push
+CMT_PRAGMA_GNU(GCC diagnostic push)
#if CMT_HAS_WARNING("-Wdeprecated-declarations")
-#pragma clang diagnostic ignored "-Wdeprecated-declarations"
+CMT_PRAGMA_GNU(GCC diagnostic ignored "-Wdeprecated-declarations")
#endif
template <int = 0>
@@ -66,7 +66,7 @@ void python(const std::string& name, const std::string& code)
fclose(f);
std::system(("python \"" + filename + "\"").c_str());
}
-#pragma clang diagnostic pop
+CMT_PRAGMA_GNU(GCC diagnostic pop)
}
inline std::string concat_args() { return {}; }
diff --git a/include/kfr/io/tostring.hpp b/include/kfr/io/tostring.hpp
@@ -111,6 +111,7 @@ inline std::string array_to_string(const kfr::complex<T>* source, size_t N)
template <typename T>
struct representation<kfr::complex<T>>
{
+ using type = std::string;
static std::string get(const kfr::complex<T>& value)
{
return as_string(value.real()) + " + " + as_string(value.imag()) + "j";
@@ -120,12 +121,14 @@ struct representation<kfr::complex<T>>
template <>
struct representation<kfr::cpu_t>
{
+ using type = std::string;
static std::string get(kfr::cpu_t value) { return kfr::cpu_name(value); }
};
template <typename T, size_t N>
struct representation<kfr::vec<T, N>>
{
+ using type = std::string;
static std::string get(const kfr::vec<T, N>& value)
{
return details::array_to_string(value.data(), value.size());
@@ -135,6 +138,7 @@ struct representation<kfr::vec<T, N>>
template <typename T, size_t Tag>
struct representation<kfr::univector<T, Tag>>
{
+ using type = std::string;
static std::string get(const kfr::univector<T, Tag>& value)
{
return details::array_to_string(value.data(), value.size());
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
@@ -67,28 +67,33 @@ else ()
set(EMULATOR "")
endif ()
+#get_cmake_property(_variableNames VARIABLES)
+#foreach (_variableName ${_variableNames})
+# message(STATUS "${_variableName}=${${_variableName}}")
+#endforeach()
+
if (NOT IOS)
enable_testing()
add_test(NAME base_test
- COMMAND ${EMULATOR} ${PROJECT_BINARY_DIR}/tests/base_test)
+ COMMAND ${EMULATOR} ${PROJECT_BINARY_DIR}/bin/base_test)
add_test(NAME intrinsic_test
- COMMAND ${EMULATOR} ${PROJECT_BINARY_DIR}/tests/intrinsic_test)
+ COMMAND ${EMULATOR} ${PROJECT_BINARY_DIR}/bin/intrinsic_test)
add_test(NAME dsp_test
- COMMAND ${EMULATOR} ${PROJECT_BINARY_DIR}/tests/dsp_test)
+ COMMAND ${EMULATOR} ${PROJECT_BINARY_DIR}/bin/dsp_test)
if (MPFR_FOUND)
add_test(NAME transcendental_test
- COMMAND ${EMULATOR} ${PROJECT_BINARY_DIR}/tests/transcendental_test)
+ COMMAND ${EMULATOR} ${PROJECT_BINARY_DIR}/bin/transcendental_test)
endif ()
add_test(NAME complex_test
- COMMAND ${EMULATOR} ${PROJECT_BINARY_DIR}/tests/complex_test)
+ COMMAND ${EMULATOR} ${PROJECT_BINARY_DIR}/bin/complex_test)
add_test(NAME expression_test
- COMMAND ${EMULATOR} ${PROJECT_BINARY_DIR}/tests/expression_test)
+ COMMAND ${EMULATOR} ${PROJECT_BINARY_DIR}/bin/expression_test)
if (NOT ARM)
add_test(NAME multiarch
- COMMAND ${EMULATOR} ${PROJECT_BINARY_DIR}/tests/multiarch)
+ COMMAND ${EMULATOR} ${PROJECT_BINARY_DIR}/bin/multiarch)
endif ()
add_test(NAME dft_test
- COMMAND ${EMULATOR} ${PROJECT_BINARY_DIR}/tests/dft_test)
+ COMMAND ${EMULATOR} ${PROJECT_BINARY_DIR}/bin/dft_test)
endif ()
diff --git a/tests/base_test.cpp b/tests/base_test.cpp
@@ -76,16 +76,17 @@ TEST(test_basic)
CHECK(even(numbers2) == vec<int, 4>{ 100, 102, 104, 106 });
// * The following command pairs are equivalent:
- CHECK(permute(numbers1, elements<0, 2, 1, 3, 4, 6, 5, 7>) == vec<int, 8>{ 0, 2, 1, 3, 4, 6, 5, 7 });
- CHECK(permute(numbers1, elements<0, 2, 1, 3>) == vec<int, 8>{ 0, 2, 1, 3, 4, 6, 5, 7 });
+ CHECK(permute(numbers1, elements_t<0, 2, 1, 3, 4, 6, 5, 7>()) == vec<int, 8>{ 0, 2, 1, 3, 4, 6, 5, 7 });
+ CHECK(permute(numbers1, elements_t<0, 2, 1, 3>()) == vec<int, 8>{ 0, 2, 1, 3, 4, 6, 5, 7 });
- CHECK(shuffle(numbers1, numbers2, elements<0, 8, 2, 10, 4, 12, 6, 14>) ==
+ CHECK(shuffle(numbers1, numbers2, elements_t<0, 8, 2, 10, 4, 12, 6, 14>()) ==
vec<int, 8>{ 0, 100, 2, 102, 4, 104, 6, 106 });
- CHECK(shuffle(numbers1, numbers2, elements<0, 8>) == vec<int, 8>{ 0, 100, 2, 102, 4, 104, 6, 106 });
+ CHECK(shuffle(numbers1, numbers2, elements_t<0, 8>()) == vec<int, 8>{ 0, 100, 2, 102, 4, 104, 6, 106 });
- CHECK(blend(numbers1, numbers2, elements<0, 1, 1, 0, 1, 1, 0, 1>) ==
+ CHECK(blend(numbers1, numbers2, elements_t<0, 1, 1, 0, 1, 1, 0, 1>()) ==
+ vec<int, 8>{ 0, 101, 102, 3, 104, 105, 6, 107 });
+ CHECK(blend(numbers1, numbers2, elements_t<0, 1, 1>()) ==
vec<int, 8>{ 0, 101, 102, 3, 104, 105, 6, 107 });
- CHECK(blend(numbers1, numbers2, elements<0, 1, 1>) == vec<int, 8>{ 0, 101, 102, 3, 104, 105, 6, 107 });
// * Transpose matrix:
const auto sixteen = enumerate<float, 16>();
@@ -154,6 +155,9 @@ TEST(vec_zerovector)
TEST(vec_allonesvector)
{
+ CHECK(bitcast<u32>(constants<f32>::allones()) == 0xFFFFFFFFu);
+ CHECK(bitcast<u64>(constants<f64>::allones()) == 0xFFFFFFFFFFFFFFFFull);
+
CHECK(~allonesvector<f32, 3>() == f32x3{ 0, 0, 0 });
CHECK(allonesvector<i16, 3>() == i16x3{ -1, -1, -1 });
CHECK(allonesvector<u8, 3>() == u8x3{ 255, 255, 255 });
@@ -221,6 +225,10 @@ TEST(vec_is_convertible)
static_assert(std::is_convertible<vec<complex<float>, 2>, vec<complex<double>, 2>>::value, "");
static_assert(std::is_convertible<vec<vec<float, 4>, 2>, vec<vec<double, 4>, 2>>::value, "");
+ testo::assert_is_same<i32x4, common_type<i32x4>>();
+ testo::assert_is_same<u32x4, common_type<i32x4, u32x4>>();
+ testo::assert_is_same<f64x4, common_type<i32x4, u32x4, f64x4>>();
+
CHECK(static_cast<f32x4>(4.f) == f32x4{ 4.f, 4.f, 4.f, 4.f });
CHECK(static_cast<f64x8>(4.f) == f64x8{ 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0 });
CHECK(static_cast<u8x3>(4.f) == u8x3{ 4, 4, 4 });
@@ -282,7 +290,7 @@ TEST(test_stat)
}
}
-int main(int argc, char** argv)
+int main()
{
println(library_version());
return testo::run_all("", true);
diff --git a/tests/complex_test.cpp b/tests/complex_test.cpp
@@ -9,7 +9,6 @@
#include <kfr/io.hpp>
using namespace kfr;
-using testo::assert_is_same;
TEST(complex_vector)
{
@@ -67,7 +66,7 @@ TEST(complex_math)
CHECK(a - b == make_vector(c32{ -2, -2 }));
CHECK(a * b == make_vector(c32{ -5, 10 }));
CHECK(a * 2 == make_vector(c32{ 2, 4 }));
- CHECK(a / b == make_vector(c32{ 0.44, 0.08 }));
+ CHECK(a / b == make_vector(c32{ 0.44f, 0.08f }));
CHECK(-a == make_vector(c32{ -1, -2 }));
CHECK(real(a) == make_vector(1.f));
@@ -86,26 +85,26 @@ TEST(complex_math)
testo::epsilon<f32>() *= 5;
testo::epsilon<f64>() *= 5;
- CHECK(csin(c32{ 1.f, 1.f }) == c32{ 1.2984575814159773, 0.634963914784736 });
- CHECK(ccos(c32{ 1.f, 1.f }) == c32{ 0.8337300251311489, -0.9888977057628651 });
- CHECK(csinh(c32{ 1.f, 1.f }) == c32{ 0.634963914784736, 1.2984575814159773 });
- CHECK(ccosh(c32{ 1.f, 1.f }) == c32{ 0.8337300251311489, 0.9888977057628651 });
+ CHECK(csin(c32{ 1.f, 1.f }) == c32{ 1.2984575814159773f, 0.634963914784736f });
+ CHECK(ccos(c32{ 1.f, 1.f }) == c32{ 0.8337300251311489f, -0.9888977057628651f });
+ CHECK(csinh(c32{ 1.f, 1.f }) == c32{ 0.634963914784736f, 1.2984575814159773f });
+ CHECK(ccosh(c32{ 1.f, 1.f }) == c32{ 0.8337300251311489f, 0.9888977057628651f });
- CHECK(clog(c32{ 1.f, 1.f }) == c32{ 0.34657359027997264, 0.7853981633974483 });
- CHECK(clog2(c32{ 1.f, 1.f }) == c32{ 0.5, 1.1330900354567983 });
- CHECK(clog10(c32{ 1.f, 1.f }) == c32{ 0.15051499783199057, 0.3410940884604603 });
+ CHECK(clog(c32{ 1.f, 1.f }) == c32{ 0.34657359027997264f, 0.7853981633974483f });
+ CHECK(clog2(c32{ 1.f, 1.f }) == c32{ 0.5f, 1.1330900354567983f });
+ CHECK(clog10(c32{ 1.f, 1.f }) == c32{ 0.15051499783199057f, 0.3410940884604603f });
- CHECK(cexp(c32{ 1.f, 1.f }) == c32{ 1.4686939399158849, 2.2873552871788423 });
- CHECK(cexp2(c32{ 1.f, 1.f }) == c32{ 1.5384778027279442, 1.2779225526272695 });
- CHECK(cexp10(c32{ 1.f, 1.f }) == c32{ -6.682015101903131, 7.439803369574931 });
+ CHECK(cexp(c32{ 1.f, 1.f }) == c32{ 1.4686939399158849f, 2.2873552871788423f });
+ CHECK(cexp2(c32{ 1.f, 1.f }) == c32{ 1.5384778027279442f, 1.2779225526272695f });
+ CHECK(cexp10(c32{ 1.f, 1.f }) == c32{ -6.682015101903131f, 7.439803369574931f });
#ifdef KFR_NATIVE_F64
- CHECK(csin(c64{ 1.f, 1.f }) == c64{ 1.2984575814159773, 0.634963914784736 });
- CHECK(ccos(c64{ 1.f, 1.f }) == c64{ 0.8337300251311489, -0.9888977057628651 });
- CHECK(csinh(c64{ 1.f, 1.f }) == c64{ 0.634963914784736, 1.2984575814159773 });
- CHECK(ccosh(c64{ 1.f, 1.f }) == c64{ 0.8337300251311489, 0.9888977057628651 });
- CHECK(clog(c64{ 1.f, 1.f }) == c64{ 0.34657359027997264, 0.7853981633974483 });
- CHECK(cexp(c64{ 1.f, 1.f }) == c64{ 1.4686939399158849, 2.2873552871788423 });
+ CHECK(csin(c64{ 1.0, 1.0 }) == c64{ 1.2984575814159773, 0.634963914784736 });
+ CHECK(ccos(c64{ 1.0, 1.0 }) == c64{ 0.8337300251311489, -0.9888977057628651 });
+ CHECK(csinh(c64{ 1.0, 1.0 }) == c64{ 0.634963914784736, 1.2984575814159773 });
+ CHECK(ccosh(c64{ 1.0, 1.0 }) == c64{ 0.8337300251311489, 0.9888977057628651 });
+ CHECK(clog(c64{ 1.0, 1.0 }) == c64{ 0.34657359027997264, 0.7853981633974483 });
+ CHECK(cexp(c64{ 1.0, 1.0 }) == c64{ 1.4686939399158849, 2.2873552871788423 });
#endif
}
@@ -161,19 +160,18 @@ TEST(complex_function_expressions)
CHECK(uv3[1] == 2.f);
CHECK(uv3[2] == 8.f);
CHECK(uv3[3] == 18.f);
-
- assert_is_same<c32, value_type_of<decltype(uv2)>>();
- assert_is_same<f32, value_type_of<decltype(uv3)>>();
- assert_is_same<f32, value_type_of<decltype(real(uv2))>>();
+ testo::assert_is_same<c32, value_type_of<decltype(uv2)>>();
+ testo::assert_is_same<f32, value_type_of<decltype(uv3)>>();
+ testo::assert_is_same<f32, value_type_of<decltype(real(uv2))>>();
}
TEST(static_tests)
{
#ifdef CMT_ARCH_SSE2
- static_assert(vector_width<f32, cpu_t::sse2> == 4, "");
- static_assert(vector_width<c32, cpu_t::sse2> == 2, "");
- static_assert(vector_width<i32, cpu_t::sse2> == 4, "");
- static_assert(vector_width<complex<i32>, cpu_t::sse2> == 2, "");
+ static_assert(platform<f32, cpu_t::sse2>::vector_width == 4, "");
+ static_assert(platform<c32, cpu_t::sse2>::vector_width == 2, "");
+ static_assert(platform<i32, cpu_t::sse2>::vector_width == 4, "");
+ static_assert(platform<complex<i32>, cpu_t::sse2>::vector_width == 2, "");
#endif
static_assert(is_numeric<vec<complex<float>, 4>>::value, "");
@@ -184,26 +182,26 @@ TEST(static_tests)
static_assert(vec<c32, 4>::size() == 4, "");
static_assert(vec<f32, 4>::scalar_size() == 4, "");
static_assert(vec<c32, 4>::scalar_size() == 8, "");
- assert_is_same<subtype<complex<i32>>, i32>();
- assert_is_same<vec<c32, 4>::value_type, c32>();
- assert_is_same<vec<c32, 4>::scalar_type, f32>();
- assert_is_same<vec<f32, 4>::value_type, f32>();
- assert_is_same<vec<f32, 4>::scalar_type, f32>();
- assert_is_same<vec<c32, 1>, decltype(make_vector(c32{ 0, 0 }))>();
- assert_is_same<vec<c32, 2>, decltype(make_vector(c32{ 0, 0 }, 4))>();
- assert_is_same<ftype<complex<i32>>, complex<f32>>();
- assert_is_same<ftype<complex<i64>>, complex<f64>>();
- assert_is_same<ftype<vec<complex<i32>, 4>>, vec<complex<f32>, 4>>();
- assert_is_same<ftype<vec<complex<i64>, 8>>, vec<complex<f64>, 8>>();
-
- assert_is_same<kfr::internal::arg<int>, kfr::internal::expression_scalar<int, 1>>();
- assert_is_same<kfr::internal::arg<complex<int>>,
- kfr::internal::expression_scalar<kfr::complex<int>, 1>>();
-
- assert_is_same<common_type<complex<int>, double>, complex<double>>();
+ testo::assert_is_same<subtype<complex<i32>>, i32>();
+ testo::assert_is_same<vec<c32, 4>::value_type, c32>();
+ testo::assert_is_same<vec<c32, 4>::scalar_type, f32>();
+ testo::assert_is_same<vec<f32, 4>::value_type, f32>();
+ testo::assert_is_same<vec<f32, 4>::scalar_type, f32>();
+ testo::assert_is_same<vec<c32, 1>, decltype(make_vector(c32{ 0, 0 }))>();
+ testo::assert_is_same<vec<c32, 2>, decltype(make_vector(c32{ 0, 0 }, 4))>();
+ testo::assert_is_same<ftype<complex<i32>>, complex<f32>>();
+ testo::assert_is_same<ftype<complex<i64>>, complex<f64>>();
+ testo::assert_is_same<ftype<vec<complex<i32>, 4>>, vec<complex<f32>, 4>>();
+ testo::assert_is_same<ftype<vec<complex<i64>, 8>>, vec<complex<f64>, 8>>();
+
+ testo::assert_is_same<kfr::internal::arg<int>, kfr::internal::expression_scalar<int, 1>>();
+ testo::assert_is_same<kfr::internal::arg<complex<int>>,
+ kfr::internal::expression_scalar<kfr::complex<int>, 1>>();
+
+ testo::assert_is_same<common_type<complex<int>, double>, complex<double>>();
}
-int main(int argc, char** argv)
+int main()
{
println(library_version());
diff --git a/tests/dft_test.cpp b/tests/dft_test.cpp
@@ -92,7 +92,7 @@ TEST(fft_accuracy)
});
}
-int main(int argc, char** argv)
+int main()
{
println(library_version());
diff --git a/tests/dsp_test.cpp b/tests/dsp_test.cpp
@@ -6,7 +6,6 @@
#include "testo/testo.hpp"
#include <kfr/base.hpp>
-#include <kfr/dft.hpp>
#include <kfr/dsp.hpp>
#include <kfr/io.hpp>
@@ -21,7 +20,7 @@ TEST(delay)
CHECK(v2[2] == 101);
CHECK(v2[19] == 118);
- const univector<float, 33> v3 = delay(v1, csize<3>);
+ const univector<float, 33> v3 = delay(v1, csize_t<3>());
CHECK(v3[0] == 0);
CHECK(v3[1] == 0);
CHECK(v3[2] == 0);
@@ -34,16 +33,16 @@ TEST(fracdelay)
{
univector<double, 5> a({ 1, 2, 3, 4, 5 });
univector<double, 5> b = fracdelay(a, 0.5);
- CHECK(rms(b - univector<double>({ 0.5, 1.5, 2.5, 3.5, 4.5 })) < c_epsilon<double> * 5);
+ CHECK(rms(b - univector<double>({ 0.5, 1.5, 2.5, 3.5, 4.5 })) < constants<double>::epsilon * 5);
b = fracdelay(a, 0.1);
- CHECK(rms(b - univector<double>({ 0.9, 1.9, 2.9, 3.9, 4.9 })) < c_epsilon<double> * 5);
+ CHECK(rms(b - univector<double>({ 0.9, 1.9, 2.9, 3.9, 4.9 })) < constants<double>::epsilon * 5);
b = fracdelay(a, 0.0);
- CHECK(rms(b - univector<double>({ 1, 2, 3, 4, 5 })) < c_epsilon<double> * 5);
+ CHECK(rms(b - univector<double>({ 1, 2, 3, 4, 5 })) < constants<double>::epsilon * 5);
b = fracdelay(a, 1.0);
- CHECK(rms(b - univector<double>({ 0, 1, 2, 3, 4 })) < c_epsilon<double> * 5);
+ CHECK(rms(b - univector<double>({ 0, 1, 2, 3, 4 })) < constants<double>::epsilon * 5);
}
TEST(mixdown)
@@ -80,7 +79,7 @@ TEST(phasor)
CHECK(rms(v1 - v2) < 1.e-5);
}
-int main(int argc, char** argv)
+int main()
{
println(library_version());
return testo::run_all("", true);
diff --git a/tests/empty_test.cpp b/tests/empty_test.cpp
@@ -2,4 +2,4 @@
using namespace kfr;
-int main(int argc, char** argv) { return 0; }
+int main() {}
diff --git a/tests/expression_test.cpp b/tests/expression_test.cpp
@@ -166,7 +166,7 @@ TEST(partition)
}
}
-int main(int argc, char** argv)
+int main()
{
println(library_version());
diff --git a/tests/intrinsic_test.cpp b/tests/intrinsic_test.cpp
@@ -70,7 +70,7 @@ inline T ref_abs(T x)
template <typename T>
bool builtin_add_overflow(T x, T y, T* r)
{
-#if __has_builtin(__builtin_add_overflow)
+#if CMT_HAS_BUILTIN(__builtin_add_overflow) || defined CMT_COMPILER_GCC
return __builtin_add_overflow(x, y, r);
#else
*r = x + y;
@@ -80,17 +80,27 @@ bool builtin_add_overflow(T x, T y, T* r)
template <>
bool builtin_add_overflow<u64>(u64 x, u64 y, u64* r)
{
+#if CMT_HAS_BUILTIN(__builtin_uaddll_overflow) || defined CMT_COMPILER_GCC
return __builtin_uaddll_overflow(x, y, reinterpret_cast<unsigned long long*>(r));
+#else
+ *r = x + y;
+ return x > 0xFFFFFFFFFFFFFFFFull - y;
+#endif
}
template <>
bool builtin_add_overflow<i64>(i64 x, i64 y, i64* r)
{
+#if CMT_HAS_BUILTIN(__builtin_saddll_overflow) || defined CMT_COMPILER_GCC
return __builtin_saddll_overflow(x, y, reinterpret_cast<long long*>(r));
+#else
+ *r = x + y;
+ return !((x ^ y) & 0x8000000000000000ull) && ((*r ^ x) & 0x8000000000000000ull);
+#endif
}
template <typename T>
bool builtin_sub_overflow(T x, T y, T* r)
{
-#if __has_builtin(__builtin_sub_overflow)
+#if CMT_HAS_BUILTIN(__builtin_sub_overflow) || defined CMT_COMPILER_GCC
return __builtin_sub_overflow(x, y, r);
#else
*r = x - y;
@@ -100,12 +110,22 @@ bool builtin_sub_overflow(T x, T y, T* r)
template <>
bool builtin_sub_overflow<u64>(u64 x, u64 y, u64* r)
{
+#if CMT_HAS_BUILTIN(__builtin_usubll_overflow) || defined CMT_COMPILER_GCC
return __builtin_usubll_overflow(x, y, reinterpret_cast<unsigned long long*>(r));
+#else
+ *r = x - y;
+ return x < y;
+#endif
}
template <>
bool builtin_sub_overflow<i64>(i64 x, i64 y, i64* r)
{
+#if CMT_HAS_BUILTIN(__builtin_ssubll_overflow) || defined CMT_COMPILER_GCC
return __builtin_ssubll_overflow(x, y, reinterpret_cast<long long*>(r));
+#else
+ *r = x - y;
+ return ((x ^ y) & 0x8000000000000000ull) && ((*r ^ x) & 0x8000000000000000ull);
+#endif
}
//#endif
template <typename T>
@@ -152,7 +172,7 @@ TEST(intrin_abs)
using T = type_of<decltype(type)>;
using Tsub = subtype<T>;
const T x(value);
- CHECK(kfr::abs(x) == apply([](auto x) { return Tsub(std::abs(x)); }, x));
+ CHECK(kfr::abs(x) == apply([](auto x) { return ref_abs(x); }, x));
});
}
@@ -389,4 +409,4 @@ TEST(intrin_math)
CHECK(kfr::note_to_hertz(pack(60)) == pack(fbase(261.6255653005986346778499935233)));
}
-int main(int argc, char** argv) { return testo::run_all("", false); }
+int main() { return testo::run_all("", false); }
diff --git a/tests/multiarch.cpp b/tests/multiarch.cpp
@@ -54,4 +54,4 @@ TEST(test_fir_avx)
}
}
-int main(int argc, char** argv) { return testo::run_all("", true); }
+int main() { return testo::run_all("", true); }
diff --git a/tests/testo/testo.hpp b/tests/testo/testo.hpp
@@ -99,7 +99,7 @@ struct comparison
comparison(L&& left, R&& right) : left(std::forward<L>(left)), right(std::forward<R>(right)) {}
- bool operator()() { return cmp(left, right); }
+ bool operator()() const { return cmp(left, right); }
};
template <typename Left, typename Right>
@@ -120,8 +120,8 @@ struct equality_comparer
bool operator()(const L& l, const R& r) const { return l == r; }
};
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wfloat-equal"
+CMT_PRAGMA_GNU(GCC diagnostic push)
+CMT_PRAGMA_GNU(GCC diagnostic ignored "-Wfloat-equal")
template <typename T>
inline T& epsilon()
@@ -149,7 +149,7 @@ struct equality_comparer<long double, long double>
}
};
-#pragma clang diagnostic pop
+CMT_PRAGMA_GNU(GCC diagnostic pop)
template <typename L, typename R>
struct equality_comparer<L, R, void_t<enable_if<!compound_type_traits<L>::is_scalar>>>
@@ -179,7 +179,7 @@ struct cmp_eq
static const char* op() { return "=="; }
template <typename L, typename R>
- bool operator()(L&& left, R&& right)
+ bool operator()(L&& left, R&& right) const
{
equality_comparer<decay<L>, decay<R>> eq;
return eq(left, right);
@@ -191,7 +191,7 @@ struct cmp_ne
static const char* op() { return "!="; }
template <typename L, typename R>
- bool operator()(L&& left, R&& right)
+ bool operator()(L&& left, R&& right) const
{
return !cmp_eq()(left, right);
}
@@ -202,7 +202,7 @@ struct cmp_lt
static const char* op() { return "<"; }
template <typename L, typename R>
- bool operator()(L&& left, R&& right)
+ bool operator()(L&& left, R&& right) const
{
return left < right;
}
@@ -213,7 +213,7 @@ struct cmp_gt
static const char* op() { return ">"; }
template <typename L, typename R>
- bool operator()(L&& left, R&& right)
+ bool operator()(L&& left, R&& right) const
{
return left > right;
}
@@ -224,7 +224,7 @@ struct cmp_le
static const char* op() { return "<="; }
template <typename L, typename R>
- bool operator()(L&& left, R&& right)
+ bool operator()(L&& left, R&& right) const
{
return left <= right;
}
@@ -235,7 +235,7 @@ struct cmp_ge
static const char* op() { return ">="; }
template <typename L, typename R>
- bool operator()(L&& left, R&& right)
+ bool operator()(L&& left, R&& right) const
{
return left >= right;
}
@@ -394,14 +394,14 @@ struct test_case
}
template <typename Op, typename L, typename R>
- void check(comparison<Op, L, R> comparison, const char* expr)
+ void check(const comparison<Op, L, R>& comparison, const char* expr)
{
bool result = comparison();
check(result, as_string(comparison.left, " ", Op::op(), " ", comparison.right), expr);
}
template <typename L>
- void check(half_comparison<L> comparison, const char* expr)
+ void check(const half_comparison<L>& comparison, const char* expr)
{
bool result = comparison.left ? true : false;
check(result, as_string(comparison.left), expr);
diff --git a/tests/transcendental_test.cpp b/tests/transcendental_test.cpp
@@ -26,7 +26,8 @@ double ulps(T test, const mpfr::number& ref)
TEST(test_sin_cos)
{
- testo::matrix(named("type") = ctypes<f32, f64>, named("value") = make_range(0.0, +c_pi<f64, 2>, 0.05),
+ testo::matrix(named("type") = ctypes_t<f32, f64>(),
+ named("value") = make_range(0.0, +constants<f64>::pi * 2, 0.05),
[](auto type, double value) {
using T = type_of<decltype(type)>;
const T x(value);
@@ -37,7 +38,7 @@ TEST(test_sin_cos)
TEST(test_log)
{
- testo::matrix(named("type") = ctypes<f32, f64>, named("value") = make_range(0.0, 100.0, 0.5),
+ testo::matrix(named("type") = ctypes_t<f32, f64>(), named("value") = make_range(0.0, 100.0, 0.5),
[](auto type, double value) {
using T = type_of<decltype(type)>;
const T x(value);
@@ -47,7 +48,7 @@ TEST(test_log)
TEST(test_log2)
{
- testo::matrix(named("type") = ctypes<f32, f64>, named("value") = make_range(0.0, 100.0, 0.5),
+ testo::matrix(named("type") = ctypes_t<f32, f64>(), named("value") = make_range(0.0, 100.0, 0.5),
[](auto type, double value) {
using T = type_of<decltype(type)>;
const T x(value);
@@ -57,7 +58,7 @@ TEST(test_log2)
TEST(test_log10)
{
- testo::matrix(named("type") = ctypes<f32, f64>, named("value") = make_range(0.0, 100.0, 0.5),
+ testo::matrix(named("type") = ctypes_t<f32, f64>(), named("value") = make_range(0.0, 100.0, 0.5),
[](auto type, double value) {
using T = type_of<decltype(type)>;
const T x(value);
@@ -67,7 +68,7 @@ TEST(test_log10)
TEST(test_exp)
{
- testo::matrix(named("type") = ctypes<f32, f64>, named("value") = make_range(-10, +10, 0.05),
+ testo::matrix(named("type") = ctypes_t<f32, f64>(), named("value") = make_range(-10, +10, 0.05),
[](auto type, double value) {
using T = type_of<decltype(type)>;
const T x(value);
@@ -77,7 +78,7 @@ TEST(test_exp)
TEST(test_exp2)
{
- testo::matrix(named("type") = ctypes<f32, f64>, named("value") = make_range(-10, +10, 0.05),
+ testo::matrix(named("type") = ctypes_t<f32, f64>(), named("value") = make_range(-10, +10, 0.05),
[](auto type, double value) {
using T = type_of<decltype(type)>;
const T x(value);
@@ -87,7 +88,7 @@ TEST(test_exp2)
TEST(test_exp10)
{
- testo::matrix(named("type") = ctypes<f32, f64>, named("value") = make_range(-10, +10, 0.05),
+ testo::matrix(named("type") = ctypes_t<f32, f64>(), named("value") = make_range(-10, +10, 0.05),
[](auto type, double value) {
using T = type_of<decltype(type)>;
const T x(value);
@@ -95,7 +96,7 @@ TEST(test_exp10)
});
}
-int main(int argc, char** argv)
+int main()
{
mpfr::scoped_precision p(128);
return testo::run_all("");