commit 631c3168538b69f23e34504bb2dee4da54f53fec
parent 3cd9c5ba89939cb310a200c52fc4fec07619a579
Author: d.levin256@gmail.com <d.levin256@gmail.com>
Date: Sun, 25 Nov 2018 20:51:12 +0300
Optimize final FFT butterflies
Diffstat:
3 files changed, 32 insertions(+), 37 deletions(-)
diff --git a/include/kfr/data/sincos.hpp b/include/kfr/data/sincos.hpp
@@ -31,8 +31,12 @@ namespace kfr
namespace data
{
+template <typename T>
+constexpr T c_sin_table[65];
+
// data generated by mpfr
-constexpr f32 c_sin_table_f32[64] = {
+template <>
+constexpr f32 c_sin_table<f32>[65] = {
/* sin(2*pi* 0/ 256) */ f32(0.0),
/* sin(2*pi* 1/ 256) */ f32(0.02454122852291228803173452945928292506547),
/* sin(2*pi* 2/ 256) */ f32(0.04906767432741801425495497694268265831475),
@@ -96,11 +100,13 @@ constexpr f32 c_sin_table_f32[64] = {
/* sin(2*pi* 60/ 256) */ f32(0.9951847266721968862448369531094799215755),
/* sin(2*pi* 61/ 256) */ f32(0.9972904566786902161355971401825678211717),
/* sin(2*pi* 62/ 256) */ f32(0.9987954562051723927147716047591006944432),
- /* sin(2*pi* 63/ 256) */ f32(0.9996988186962042201157656496661721968501)
+ /* sin(2*pi* 63/ 256) */ f32(0.9996988186962042201157656496661721968501),
+ /* sin(2*pi* 64/ 256) */ f32(1.0000000000000000000000000000000000000000)
};
// data generated by mpfr
-constexpr f64 c_sin_table_f64[64] = {
+template <>
+constexpr f64 c_sin_table<f64>[65] = {
/* sin(2*pi* 0/ 256) */ f64(0.0),
/* sin(2*pi* 1/ 256) */ f64(0.02454122852291228803173452945928292506547),
/* sin(2*pi* 2/ 256) */ f64(0.04906767432741801425495497694268265831475),
@@ -164,30 +170,16 @@ constexpr f64 c_sin_table_f64[64] = {
/* sin(2*pi* 60/ 256) */ f64(0.9951847266721968862448369531094799215755),
/* sin(2*pi* 61/ 256) */ f64(0.9972904566786902161355971401825678211717),
/* sin(2*pi* 62/ 256) */ f64(0.9987954562051723927147716047591006944432),
- /* sin(2*pi* 63/ 256) */ f64(0.9996988186962042201157656496661721968501)
+ /* sin(2*pi* 63/ 256) */ f64(0.9996988186962042201157656496661721968501),
+ /* sin(2*pi* 64/ 256) */ f64(1.0000000000000000000000000000000000000000)
};
-}
-template <typename T>
-constexpr inline T sin_using_table_256(size_t k);
+} // namespace data
-template <>
-constexpr inline f32 sin_using_table_256<f32>(size_t k)
-{
- return (k == 0 || k == 128) ? 0.f
- : (k == 64) ? 1.f
- : (k > 128) ? -sin_using_table_256<f32>(k - 128)
- : (k > 64) ? sin_using_table_256<f32>(128 - k)
- : data::c_sin_table_f32[k];
-}
-template <>
-constexpr inline f64 sin_using_table_256<f64>(size_t k)
+template <typename T>
+constexpr inline T sin_using_table_256(size_t k)
{
- return (k == 0 || k == 128) ? 0.0
- : (k == 64) ? 1.0
- : (k > 128) ? -sin_using_table_256<f64>(k - 128)
- : (k > 64) ? sin_using_table_256<f64>(128 - k)
- : data::c_sin_table_f64[k];
+ return (k > 128 ? -1 : +1) * data::c_sin_table<T>[k % 128 >= 64 ? 128 - k % 128 : k % 128];
}
template <typename T>
@@ -200,4 +192,4 @@ constexpr inline T cos_using_table(size_t size, size_t k)
{
return sin_using_table<T>(size, k + size / 4);
}
-}
+} // namespace kfr
diff --git a/include/kfr/dft/dft-src.cpp b/include/kfr/dft/dft-src.cpp
@@ -519,19 +519,21 @@ protected:
final_stage(csize<size>, 1, cbool<splitin>, out, in, twiddle);
}
- // KFR_INTRIN void final_stage(csize_t<32>, size_t invN, cfalse_t, complex<T>* out, const complex<T>*,
- // const complex<T>*& twiddle)
- // {
- // radix4_pass(csize_t<32>(), invN, csize_t<width>(), cfalse, cfalse, cbool_t<use_br2>(),
- // cbool_t<prefetch>(), cbool_t<inverse>(), cbool_t<aligned>(), out, out, twiddle);
- // }
- //
- // KFR_INTRIN void final_stage(csize_t<16>, size_t invN, cfalse_t, complex<T>* out, const complex<T>*,
- // const complex<T>*& twiddle)
- // {
- // radix4_pass(csize_t<16>(), invN, csize_t<width>(), cfalse, cfalse, cbool_t<use_br2>(),
- // cbool_t<prefetch>(), cbool_t<inverse>(), cbool_t<aligned>(), out, out, twiddle);
- // }
+ template <typename U = T, KFR_ENABLE_IF(is_same<U, float>::value)>
+ KFR_INTRIN void final_stage(csize_t<32>, size_t invN, cfalse_t, complex<T>* out, const complex<T>*,
+ const complex<T>*& twiddle)
+ {
+ radix4_pass(csize_t<32>(), invN, csize_t<width>(), cfalse, cfalse, cbool_t<use_br2>(),
+ cbool_t<prefetch>(), cbool_t<inverse>(), cbool_t<aligned>(), out, out, twiddle);
+ }
+
+ template <typename U = T, KFR_ENABLE_IF(is_same<U, float>::value)>
+ KFR_INTRIN void final_stage(csize_t<16>, size_t invN, cfalse_t, complex<T>* out, const complex<T>*,
+ const complex<T>*& twiddle)
+ {
+ radix4_pass(csize_t<16>(), invN, csize_t<width>(), cfalse, cfalse, cbool_t<use_br2>(),
+ cbool_t<prefetch>(), cbool_t<inverse>(), cbool_t<aligned>(), out, out, twiddle);
+ }
KFR_INTRIN void final_stage(csize_t<8>, size_t invN, cfalse_t, complex<T>* out, const complex<T>*,
const complex<T>*& twiddle)
diff --git a/include/kfr/dft/fft.hpp b/include/kfr/dft/fft.hpp
@@ -30,6 +30,7 @@
#include "../base/memory.hpp"
#include "../base/read_write.hpp"
#include "../base/small_buffer.hpp"
+#include "../base/univector.hpp"
#include "../base/vec.hpp"
CMT_PRAGMA_GNU(GCC diagnostic push)