FFT: real-to-complex and complex-to-real transforms - kfr - Fast, modern C++ DSP framework, FFT, Sample Rate Conversion, FIR/IIR/Biquad Filters (SSE, AVX, AVX-512, ARM NEON)

commit d467e805da09eaf8512f06afe142b2f083f356f8
parent f1417c92b6e925b28c8a8498bf32144784238fdd
Author: d.levin256@gmail.com <d.levin256@gmail.com>
Date:   Wed,  7 Sep 2016 12:51:17 +0300

FFT: real-to-complex and complex-to-real transforms

Diffstat:
M include/kfr/dft/fft.hpp  | 142 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

1 file changed, 142 insertions(+), 0 deletions(-)
diff --git a/include/kfr/dft/fft.hpp b/include/kfr/dft/fft.hpp
@@ -1008,6 +1008,148 @@ protected:
         }
     }
 };
+
+enum class dft_pack_format
+{
+    Perm,
+    CCs
+};
+
+template <typename T>
+struct dft_plan_real : dft_plan<T>
+{
+    template <bool direct = true, bool inverse = true>
+    dft_plan_real(size_t size, cbools_t<direct, inverse> type = dft_type::both)
+        : dft_plan<T>(size / 2, type), rtwiddle(size / 4)
+    {
+        using namespace internal;
+        const size_t csize = size / 2;
+        const size_t count = size / 4;
+
+        constexpr size_t width = vector_width<T> * 2;
+
+        block_process(count, csizes<width, 1>, [=](size_t i, auto w) {
+            constexpr size_t width = val_of(decltype(w)());
+            cwrite<width>(rtwiddle.data() + i,
+                          cossin(dup(-c_pi<T> * ((enumerate<T, width>() + i + count) / csize))));
+        });
+    }
+
+    KFR_INTRIN void execute(complex<T>* out, const T* in, u8* temp,
+                            dft_pack_format fmt = dft_pack_format::CCs) const
+    {
+        this->execute_dft(cfalse, out, ptr_cast<complex<T>>(in), temp);
+        to_fmt(out, fmt);
+    }
+    KFR_INTRIN void execute(T* out, const complex<T>* in, u8* temp,
+                            dft_pack_format fmt = dft_pack_format::CCs) const
+    {
+        complex<T>* outdata = ptr_cast<complex<T>>(out);
+        from_fmt(outdata, in, fmt);
+        this->execute_dft(ctrue, outdata, outdata, temp);
+    }
+
+    template <size_t Tag1, size_t Tag2, size_t Tag3>
+    KFR_INTRIN void execute(univector<complex<T>, Tag1>& out, const univector<T, Tag2>& in,
+                            univector<u8, Tag3>& temp, dft_pack_format fmt = dft_pack_format::CCs) const
+    {
+        this->execute_dft(cfalse, out.data(), ptr_cast<complex<T>>(in.data()), temp.data());
+        to_fmt(out.data(), fmt);
+    }
+    template <size_t Tag1, size_t Tag2, size_t Tag3>
+    KFR_INTRIN void execute(univector<T, Tag1>& out, const univector<complex<T>, Tag2>& in,
+                            univector<u8, Tag3>& temp, dft_pack_format fmt = dft_pack_format::CCs) const
+    {
+        complex<T>* outdata = ptr_cast<complex<T>>(out.data());
+        from_fmt(outdata, in.data(), fmt);
+        this->execute_dft(ctrue, outdata, outdata, temp.data());
+    }
+
+private:
+    univector<complex<T>> rtwiddle;
+
+    void to_fmt(complex<T>* out, dft_pack_format fmt) const
+    {
+        using namespace internal;
+        const size_t csize = this->size; // / 2;
+
+        constexpr size_t width = vector_width<T> * 2;
+        const cvec<T, 1> dc = cread<1>(out);
+        const size_t count = csize / 2;
+
+        block_process(count, csizes<width, 1>, [=](size_t i, auto w) {
+            constexpr size_t width   = val_of(decltype(w)());
+            constexpr size_t widthm1 = width - 1;
+            const cvec<T, width> tw   = cread<width>(rtwiddle.data() + i);
+            const cvec<T, width> fpk  = cread<width>(out + i);
+            const cvec<T, width> fpnk = reverse<2>(negodd(cread<width>(out + csize - i - widthm1)));
+
+            const cvec<T, width> f1k = fpk + fpnk;
+            const cvec<T, width> f2k = fpk - fpnk;
+            const cvec<T, width> t   = cmul(f2k, tw);
+            cwrite<width>(out + i, T(0.5) * (f1k + t));
+            cwrite<width>(out + csize - i - widthm1, reverse<2>(negodd(T(0.5) * (f1k - t))));
+        });
+
+        {
+            size_t k = csize / 2;
+            const cvec<T, 1> fpk  = cread<1>(out + k);
+            const cvec<T, 1> fpnk = negodd(fpk);
+            cwrite<1>(out + k, fpnk);
+        }
+        if (fmt == dft_pack_format::CCs)
+        {
+            cwrite<1>(out, pack(dc[0] + dc[1], 0));
+            cwrite<1>(out + csize, pack(dc[0] - dc[1], 0));
+        }
+        else
+        {
+            cwrite<1>(out, pack(dc[0] + dc[1], dc[0] - dc[1]));
+        }
+    }
+    void from_fmt(complex<T>* out, const complex<T>* in, dft_pack_format fmt) const
+    {
+        using namespace internal;
+
+        const size_t csize = this->size; // / 2;
+
+        cvec<T, 1> dc;
+
+        if (fmt == dft_pack_format::CCs)
+        {
+            dc = pack(in[0].real() + in[csize].real(), in[0].real() - in[csize].real());
+        }
+        else
+        {
+            dc = pack(in[0].real() + in[0].imag(), in[0].real() - in[0].imag());
+        }
+
+        constexpr size_t width = vector_width<T> * 2;
+        const size_t count     = csize / 2;
+
+        block_process(count, csizes<width, 1>, [=](size_t i, auto w) {
+            constexpr size_t width   = val_of(decltype(w)());
+            constexpr size_t widthm1 = width - 1;
+            const cvec<T, width> tw   = cread<width>(rtwiddle.data() + i);
+            const cvec<T, width> fpk  = cread<width>(in + i);
+            const cvec<T, width> fpnk = reverse<2>(negodd(cread<width>(in + csize - i - widthm1)));
+
+            const cvec<T, width> f1k = fpk + fpnk;
+            const cvec<T, width> f2k = fpk - fpnk;
+            const cvec<T, width> t   = cmul_conj(f2k, tw);
+            cwrite<width>(out + i, f1k + t);
+            cwrite<width>(out + csize - i - widthm1, reverse<2>(negodd(f1k - t)));
+        });
+
+        {
+            size_t k = csize / 2;
+            const cvec<T, 1> fpk  = cread<1>(in + k);
+            const cvec<T, 1> fpnk = 2 * negodd(fpk);
+            cwrite<1>(out + k, fpnk);
+        }
+        cwrite<1>(out, dc);
+    }
+};
 }
 
 #pragma clang diagnostic pop

	kfr Fast, modern C++ DSP framework, FFT, Sample Rate Conversion, FIR/IIR/Biquad Filters (SSE, AVX, AVX-512, ARM NEON)
	Log \| Files \| Refs \| README