kfr

Fast, modern C++ DSP framework, FFT, Sample Rate Conversion, FIR/IIR/Biquad Filters (SSE, AVX, AVX-512, ARM NEON)
Log | Files | Refs | README

commit 631c3168538b69f23e34504bb2dee4da54f53fec
parent 3cd9c5ba89939cb310a200c52fc4fec07619a579
Author: d.levin256@gmail.com <d.levin256@gmail.com>
Date:   Sun, 25 Nov 2018 20:51:12 +0300

Optimize final FFT butterflies

Diffstat:
Minclude/kfr/data/sincos.hpp | 40++++++++++++++++------------------------
Minclude/kfr/dft/dft-src.cpp | 28+++++++++++++++-------------
Minclude/kfr/dft/fft.hpp | 1+
3 files changed, 32 insertions(+), 37 deletions(-)

diff --git a/include/kfr/data/sincos.hpp b/include/kfr/data/sincos.hpp @@ -31,8 +31,12 @@ namespace kfr namespace data { +template <typename T> +constexpr T c_sin_table[65]; + // data generated by mpfr -constexpr f32 c_sin_table_f32[64] = { +template <> +constexpr f32 c_sin_table<f32>[65] = { /* sin(2*pi* 0/ 256) */ f32(0.0), /* sin(2*pi* 1/ 256) */ f32(0.02454122852291228803173452945928292506547), /* sin(2*pi* 2/ 256) */ f32(0.04906767432741801425495497694268265831475), @@ -96,11 +100,13 @@ constexpr f32 c_sin_table_f32[64] = { /* sin(2*pi* 60/ 256) */ f32(0.9951847266721968862448369531094799215755), /* sin(2*pi* 61/ 256) */ f32(0.9972904566786902161355971401825678211717), /* sin(2*pi* 62/ 256) */ f32(0.9987954562051723927147716047591006944432), - /* sin(2*pi* 63/ 256) */ f32(0.9996988186962042201157656496661721968501) + /* sin(2*pi* 63/ 256) */ f32(0.9996988186962042201157656496661721968501), + /* sin(2*pi* 64/ 256) */ f32(1.0000000000000000000000000000000000000000) }; // data generated by mpfr -constexpr f64 c_sin_table_f64[64] = { +template <> +constexpr f64 c_sin_table<f64>[65] = { /* sin(2*pi* 0/ 256) */ f64(0.0), /* sin(2*pi* 1/ 256) */ f64(0.02454122852291228803173452945928292506547), /* sin(2*pi* 2/ 256) */ f64(0.04906767432741801425495497694268265831475), @@ -164,30 +170,16 @@ constexpr f64 c_sin_table_f64[64] = { /* sin(2*pi* 60/ 256) */ f64(0.9951847266721968862448369531094799215755), /* sin(2*pi* 61/ 256) */ f64(0.9972904566786902161355971401825678211717), /* sin(2*pi* 62/ 256) */ f64(0.9987954562051723927147716047591006944432), - /* sin(2*pi* 63/ 256) */ f64(0.9996988186962042201157656496661721968501) + /* sin(2*pi* 63/ 256) */ f64(0.9996988186962042201157656496661721968501), + /* sin(2*pi* 64/ 256) */ f64(1.0000000000000000000000000000000000000000) }; -} -template <typename T> -constexpr inline T sin_using_table_256(size_t k); +} // namespace data -template <> -constexpr inline f32 sin_using_table_256<f32>(size_t k) -{ - return (k == 0 || k == 128) ? 0.f - : (k == 64) ? 1.f - : (k > 128) ? -sin_using_table_256<f32>(k - 128) - : (k > 64) ? sin_using_table_256<f32>(128 - k) - : data::c_sin_table_f32[k]; -} -template <> -constexpr inline f64 sin_using_table_256<f64>(size_t k) +template <typename T> +constexpr inline T sin_using_table_256(size_t k) { - return (k == 0 || k == 128) ? 0.0 - : (k == 64) ? 1.0 - : (k > 128) ? -sin_using_table_256<f64>(k - 128) - : (k > 64) ? sin_using_table_256<f64>(128 - k) - : data::c_sin_table_f64[k]; + return (k > 128 ? -1 : +1) * data::c_sin_table<T>[k % 128 >= 64 ? 128 - k % 128 : k % 128]; } template <typename T> @@ -200,4 +192,4 @@ constexpr inline T cos_using_table(size_t size, size_t k) { return sin_using_table<T>(size, k + size / 4); } -} +} // namespace kfr diff --git a/include/kfr/dft/dft-src.cpp b/include/kfr/dft/dft-src.cpp @@ -519,19 +519,21 @@ protected: final_stage(csize<size>, 1, cbool<splitin>, out, in, twiddle); } - // KFR_INTRIN void final_stage(csize_t<32>, size_t invN, cfalse_t, complex<T>* out, const complex<T>*, - // const complex<T>*& twiddle) - // { - // radix4_pass(csize_t<32>(), invN, csize_t<width>(), cfalse, cfalse, cbool_t<use_br2>(), - // cbool_t<prefetch>(), cbool_t<inverse>(), cbool_t<aligned>(), out, out, twiddle); - // } - // - // KFR_INTRIN void final_stage(csize_t<16>, size_t invN, cfalse_t, complex<T>* out, const complex<T>*, - // const complex<T>*& twiddle) - // { - // radix4_pass(csize_t<16>(), invN, csize_t<width>(), cfalse, cfalse, cbool_t<use_br2>(), - // cbool_t<prefetch>(), cbool_t<inverse>(), cbool_t<aligned>(), out, out, twiddle); - // } + template <typename U = T, KFR_ENABLE_IF(is_same<U, float>::value)> + KFR_INTRIN void final_stage(csize_t<32>, size_t invN, cfalse_t, complex<T>* out, const complex<T>*, + const complex<T>*& twiddle) + { + radix4_pass(csize_t<32>(), invN, csize_t<width>(), cfalse, cfalse, cbool_t<use_br2>(), + cbool_t<prefetch>(), cbool_t<inverse>(), cbool_t<aligned>(), out, out, twiddle); + } + + template <typename U = T, KFR_ENABLE_IF(is_same<U, float>::value)> + KFR_INTRIN void final_stage(csize_t<16>, size_t invN, cfalse_t, complex<T>* out, const complex<T>*, + const complex<T>*& twiddle) + { + radix4_pass(csize_t<16>(), invN, csize_t<width>(), cfalse, cfalse, cbool_t<use_br2>(), + cbool_t<prefetch>(), cbool_t<inverse>(), cbool_t<aligned>(), out, out, twiddle); + } KFR_INTRIN void final_stage(csize_t<8>, size_t invN, cfalse_t, complex<T>* out, const complex<T>*, const complex<T>*& twiddle) diff --git a/include/kfr/dft/fft.hpp b/include/kfr/dft/fft.hpp @@ -30,6 +30,7 @@ #include "../base/memory.hpp" #include "../base/read_write.hpp" #include "../base/small_buffer.hpp" +#include "../base/univector.hpp" #include "../base/vec.hpp" CMT_PRAGMA_GNU(GCC diagnostic push)