kfr

Fast, modern C++ DSP framework, FFT, Sample Rate Conversion, FIR/IIR/Biquad Filters (SSE, AVX, AVX-512, ARM NEON)
Log | Files | Refs | README

commit fc7924baf11234af2f46767174728cea7c5c0796
parent 6c5e112c315e3e5021c4285810a82ef459958ac6
Author: Dan Levin <d.levin256@gmail.com>
Date:   Mon, 24 Feb 2020 16:19:56 +0300

Merge pull request #76 from slarew/updates

Fix std::complex compatibility and several features
Diffstat:
Mdocs/docs/convolution.md | 45+++++++++++++++++++++++++++++++++++++++++++++
Mexamples/CMakeLists.txt | 2++
Aexamples/ccv.cpp | 71+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Minclude/kfr/base/generators.hpp | 41+++++++++++++++++++++++++++++++++++++++++
Minclude/kfr/dft/convolution.hpp | 38+++++++++++++++++++++++++++++---------
Minclude/kfr/dft/fft.hpp | 6+++---
Minclude/kfr/dft/impl/convolution-impl.cpp | 222++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------------
Minclude/kfr/dsp/delay.hpp | 94++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------------
Minclude/kfr/dsp/fir.hpp | 99+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------
Minclude/kfr/dsp/iir_design.hpp | 22+++++++++++-----------
Ainclude/kfr/dsp/state_holder.hpp | 41+++++++++++++++++++++++++++++++++++++++++
Minclude/kfr/math/complex_math.hpp | 33+++++++++++++++++++++++++++++++++
Minclude/kfr/simd/complex.hpp | 9+++++----
Mtests/base_test.cpp | 12++++++++++++
Mtests/dft_test.cpp | 36+++++++++++++++++++++++++++++++++++-
Mtests/dsp_test.cpp | 46+++++++++++++++++++++++++++++++++++++++++++---
16 files changed, 701 insertions(+), 116 deletions(-)

diff --git a/docs/docs/convolution.md b/docs/docs/convolution.md @@ -68,3 +68,48 @@ reverb_RR.apply(tmp4, audio[1]); audio[0] = tmp1 + tmp2; audio[1] = tmp3 + tmp4; ``` + +# Implementation Details + +The convolution filter efficiently computes the convolution of two signals. +The efficiency is achieved by employing the FFT and the circular convolution +theorem. The algorithm is a variant of the [overlap-add +method](https://en.wikipedia.org/wiki/Overlap%E2%80%93add_method). It works on +a fixed block size \(B\) for arbitrarily long input signals. Thus, the +convolution of a streaming input signal with a long FIR filter \(h[n]\) (where +the length of \(h[n]\) may exceed the block size \(B\)) is computed with a +fixed complexity \(O(B \log B)\). + +More formally, the convolution filter computes \(y[n] = (x * h)[n]\) by +partitioning the input \(x\) and filter \(h\) into blocks and applies the +overlap-add method. Let \(x[n]\) be an input signal of arbitrary length. Often, +\(x[n]\) is a streaming input with unknown length. Let \(h[n]\) be an FIR +filter with \(M\) taps. The convolution filter works on a fixed block size +\(B=2^b\). + +First, the input and filter are windowed and shifted to the origin to give the +\(k\)-th block input \(x_k[n] = x[n + kB] , n=\{0,1,\ldots,B-1\},\forall +k\in\mathbb{Z}\) and \(j\)-th block filter \(h_j[n] = h[n + jB] , +n=\{0,1,\ldots,B-1\},j=\{0,1,\ldots,\lfloor M/B \rfloor\}\). The convolution +\(y_{k,j}[n] = (x_k * h_j)[n]\) is efficiently computed with length \(2B\) FFTs +as +\[ +y_{k,j}[n] = \mathrm{IFFT}(\mathrm{FFT}(x_k[n])\cdot\mathrm{FFT}(h_j[n])) +. +\] + +The overlap-add method sums the "overlap" from the previous block with the current block. +To complete the \(k\)-th block, the contribution of all blocks of the filter +are summed together to give +\[ y_{k}[n] = \sum_j y_{k-j,j}[n] . \] +The final convolution is then the sum of the shifted blocks +\[ y[n] = \sum_k y_{k}[n - kB] . \] +Note that \(y_k[n]\) is of length \(2B\) so its second half overlaps and adds +into the first half of the \(y_{k+1}[n]\) block. + +## Maximum efficiency criterion + +To avoid excess computation or maximize throughput, the convolution filter +should be given input samples in multiples of the block size \(B\). Otherwise, +the FFT of a block is computed twice as many times as would be necessary and +hence throughput is reduced. diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt @@ -50,4 +50,6 @@ if (ENABLE_DFT) target_link_libraries(dft kfr_multidft) target_compile_definitions(dft PRIVATE -DKFR_DFT_MULTI=1) endif () + add_executable(ccv ccv.cpp) + target_link_libraries(ccv kfr kfr_dft use_arch) endif () diff --git a/examples/ccv.cpp b/examples/ccv.cpp @@ -0,0 +1,71 @@ +/* + * ccv, part of KFR (https://www.kfr.dev) + * Copyright (C) 2019 D Levin + * See LICENSE.txt for details + */ + +// Complex convolution filter examples + +#define CMT_BASETYPE_F32 + +#include <chrono> +#include <kfr/base.hpp> +#include <kfr/dft.hpp> +#include <kfr/dsp.hpp> + +using namespace kfr; + +int main() +{ + println(library_version()); + + // low-pass filter + univector<fbase, 1023> taps127; + expression_pointer<fbase> kaiser = to_pointer(window_kaiser(taps127.size(), 3.0)); + fir_lowpass(taps127, 0.2, kaiser, true); + + // Create filters. + size_t const block_size = 256; + convolve_filter<complex<fbase>> conv_filter_complex(univector<complex<fbase>>(make_complex(taps127, zeros())), + block_size); + convolve_filter<fbase> conv_filter_real(taps127, block_size); + + // Create noise to filter. + auto const size = 1024 * 100 + 33; // not a multiple of block_size + univector<complex<fbase>> cnoise = + make_complex(truncate(gen_random_range(random_bit_generator{ 1, 2, 3, 4 }, -1.f, +1.f), size), + truncate(gen_random_range(random_bit_generator{ 3, 4, 9, 8 }, -1.f, +1.f), size)); + univector<fbase> noise = + truncate(gen_random_range(random_bit_generator{ 3, 4, 9, 8 }, -1.f, +1.f), size); + + // Filter results. + univector<complex<fbase>> filtered_cnoise_ccv(size), filtered_cnoise_fir(size); + univector<fbase> filtered_noise_ccv(size), filtered_noise_fir(size); + + // Complex filtering (time and compare). + auto tic = std::chrono::high_resolution_clock::now(); + conv_filter_complex.apply(filtered_cnoise_ccv, cnoise); + auto toc = std::chrono::high_resolution_clock::now(); + auto const ccv_time_complex = std::chrono::duration_cast<std::chrono::duration<float>>(toc - tic); + tic = toc; + filtered_cnoise_fir = kfr::fir(cnoise, taps127); + toc = std::chrono::high_resolution_clock::now(); + auto const fir_time_complex = std::chrono::duration_cast<std::chrono::duration<float>>(toc - tic); + auto const cdiff = rms(cabs(filtered_cnoise_fir - filtered_cnoise_ccv)); + + // Real filtering (time and compare). + tic = std::chrono::high_resolution_clock::now(); + conv_filter_real.apply(filtered_noise_ccv, noise); + toc = std::chrono::high_resolution_clock::now(); + auto const ccv_time_real = std::chrono::duration_cast<std::chrono::duration<float>>(toc - tic); + tic = toc; + filtered_noise_fir = kfr::fir(noise, taps127); + toc = std::chrono::high_resolution_clock::now(); + auto const fir_time_real = std::chrono::duration_cast<std::chrono::duration<float>>(toc - tic); + auto const diff = rms(filtered_noise_fir - filtered_noise_ccv); + + println("complex: convolution_filter ", ccv_time_complex.count(), " fir ", fir_time_complex.count(), " diff=", cdiff); + println("real: convolution_filter ", ccv_time_real.count(), " fir ", fir_time_real.count(), " diff=", diff); + + return 0; +} diff --git a/include/kfr/base/generators.hpp b/include/kfr/base/generators.hpp @@ -136,6 +136,35 @@ protected: T vstep; }; +template <typename T, size_t width = vector_width<T>* bitness_const(1, 2), + std::enable_if_t<std::is_same<complex<deep_subtype<T>>, T>::value, int> = 0> +struct generator_expj : generator<T, width, generator_expj<T, width>> +{ + using ST = deep_subtype<T>; + + generator_expj(ST start_, ST step_) + : step(step_), alpha(2 * sqr(sin(width * step / 2))), beta(-sin(width * step)) + { + this->resync(T(start_)); + } + KFR_MEM_INTRINSIC void sync(T start) const CMT_NOEXCEPT { this->value = init_cossin(step, start.real()); } + + KFR_MEM_INTRINSIC void next() const CMT_NOEXCEPT + { + this->value = ccomp(cdecom(this->value) - + subadd(alpha * cdecom(this->value), beta * swap<2>(cdecom(this->value)))); + } + +protected: + ST const step; + ST const alpha; + ST const beta; + CMT_NOINLINE static vec<T, width> init_cossin(ST w, ST phase) + { + return ccomp(cossin(dup(phase + enumerate<ST, width>() * w))); + } +}; + template <typename T, size_t width = vector_width<T>* bitness_const(1, 2)> struct generator_exp2 : generator<T, width, generator_exp2<T, width>> { @@ -255,6 +284,18 @@ KFR_FUNCTION internal::generator_exp<TF> gen_exp(T1 start, T2 step) /** * @brief Returns template expression that generates values using the following formula: * \f[ + x_i = e^{ j ( start + i \cdot step ) } + \f] + */ +template <typename T1, typename T2, typename TF = complex<ftype<common_type<T1, T2>>>> +KFR_FUNCTION internal::generator_expj<TF> gen_expj(T1 start, T2 step) +{ + return internal::generator_expj<TF>(start, step); +} + +/** + * @brief Returns template expression that generates values using the following formula: + * \f[ x_i = 2^{ start + i \cdot step } \f] */ diff --git a/include/kfr/dft/convolution.hpp b/include/kfr/dft/convolution.hpp @@ -84,6 +84,9 @@ public: explicit convolve_filter(size_t size, size_t block_size = 1024); explicit convolve_filter(const univector_ref<const T>& data, size_t block_size = 1024); void set_data(const univector_ref<const T>& data); + void reset() final; + /// Apply filter to multiples of returned block size for optimal processing efficiency. + size_t input_block_size() const { return block_size; } protected: void process_expression(T* dest, const expression_pointer<T>& src, size_t size) final @@ -93,19 +96,36 @@ protected: } void process_buffer(T* output, const T* input, size_t size) final; - const size_t size; + using ST = subtype<T>; + static constexpr auto real_fft = !std::is_same<T, complex<ST>>::value; + using plan_t = std::conditional_t<real_fft, dft_plan_real<T>, dft_plan<ST>>; + + // Length of filter data. + size_t data_size; + // Size of block to process. const size_t block_size; - const dft_plan_real<T> fft; + // FFT plan for circular convolution. + const plan_t fft; + // Temp storage for FFT. univector<u8> temp; - std::vector<univector<complex<T>>> segments; - std::vector<univector<complex<T>>> ir_segments; - size_t input_position; + // History of input segments after fwd DFT. History is circular relative to position below. + std::vector<univector<complex<ST>>> segments; + // Index into segments of current block. + size_t position; + // Blocks of filter/data after fwd DFT. + std::vector<univector<complex<ST>>> ir_segments; + // Saved input for current block. univector<T> saved_input; - univector<complex<T>> premul; - univector<complex<T>> cscratch; - univector<T> scratch; + // Index into saved_input for next input to begin. + size_t input_position; + // Pre-multiplied products of input history and delayed filter blocks. + univector<complex<ST>> premul; + // Scratch buffer for product of filter and input for processing by reverse DFT. + univector<complex<ST>> cscratch; + // Scratch buffers for input and output of fwd and rev DFTs. + univector<T> scratch1, scratch2; + // Overlap saved from previous block to add into current block. univector<T> overlap; - size_t position; }; } // namespace CMT_ARCH_NAME diff --git a/include/kfr/dft/fft.hpp b/include/kfr/dft/fft.hpp @@ -430,12 +430,12 @@ struct dct_plan : dft_plan<T> dft_plan<T>::execute(mirrored_dft.data(), mirrored.data(), temp, cfalse); for (size_t i = 0; i < halfSize; i++) { - out[i * 2 + 0] = mirrored_dft[i].re; - out[i * 2 + 1] = mirrored_dft[size - 1 - i].re; + out[i * 2 + 0] = mirrored_dft[i].real(); + out[i * 2 + 1] = mirrored_dft[size - 1 - i].real(); } if (size % 2) { - out[size - 1] = mirrored_dft[halfSize].re; + out[size - 1] = mirrored_dft[halfSize].real(); } } } diff --git a/include/kfr/dft/impl/convolution-impl.cpp b/include/kfr/dft/impl/convolution-impl.cpp @@ -36,37 +36,39 @@ namespace intrinsics template <typename T> univector<T> convolve(const univector_ref<const T>& src1, const univector_ref<const T>& src2) { - const size_t size = next_poweroftwo(src1.size() + src2.size() - 1); - univector<complex<T>> src1padded = src1; - univector<complex<T>> src2padded = src2; - src1padded.resize(size, 0); - src2padded.resize(size, 0); - - dft_plan_ptr<T> dft = dft_cache::instance().get(ctype_t<T>(), size); + using ST = subtype<T>; + const size_t size = next_poweroftwo(src1.size() + src2.size() - 1); + univector<complex<ST>> src1padded = src1; + univector<complex<ST>> src2padded = src2; + src1padded.resize(size); + src2padded.resize(size); + + dft_plan_ptr<ST> dft = dft_cache::instance().get(ctype_t<ST>(), size); univector<u8> temp(dft->temp_size); dft->execute(src1padded, src1padded, temp); dft->execute(src2padded, src2padded, temp); src1padded = src1padded * src2padded; dft->execute(src1padded, src1padded, temp, true); - const T invsize = reciprocal<T>(size); + const ST invsize = reciprocal<ST>(size); return truncate(real(src1padded), src1.size() + src2.size() - 1) * invsize; } template <typename T> univector<T> correlate(const univector_ref<const T>& src1, const univector_ref<const T>& src2) { - const size_t size = next_poweroftwo(src1.size() + src2.size() - 1); - univector<complex<T>> src1padded = src1; - univector<complex<T>> src2padded = reverse(src2); - src1padded.resize(size, 0); - src2padded.resize(size, 0); - dft_plan_ptr<T> dft = dft_cache::instance().get(ctype_t<T>(), size); + using ST = subtype<T>; + const size_t size = next_poweroftwo(src1.size() + src2.size() - 1); + univector<complex<ST>> src1padded = src1; + univector<complex<ST>> src2padded = reverse(src2); + src1padded.resize(size); + src2padded.resize(size); + dft_plan_ptr<ST> dft = dft_cache::instance().get(ctype_t<ST>(), size); univector<u8> temp(dft->temp_size); dft->execute(src1padded, src1padded, temp); dft->execute(src2padded, src2padded, temp); src1padded = src1padded * src2padded; dft->execute(src1padded, src1padded, temp, true); - const T invsize = reciprocal<T>(size); + const ST invsize = reciprocal<ST>(size); return truncate(real(src1padded), src1.size() + src2.size() - 1) * invsize; } @@ -80,21 +82,55 @@ univector<T> autocorrelate(const univector_ref<const T>& src1) } // namespace intrinsics +// Create a helper template struct to handle the differences between real and complex FFT. +template <typename T, typename ST = subtype<T>, + typename plan_t = + std::conditional_t<std::is_same<T, complex<ST>>::value, dft_plan<ST>, dft_plan_real<T>>> +struct convolve_filter_fft +{ + static plan_t make(size_t size); + static inline void ifft(plan_t const& plan, univector<T>& out, const univector<complex<T>>& in, + univector<u8>& temp); + static size_t csize(plan_t const& plan); +}; +// Partial template specializations for complex and real cases: +template <typename ST> +struct convolve_filter_fft<complex<ST>, ST, dft_plan<ST>> +{ + static dft_plan<ST> make(size_t size) { return dft_plan<ST>(size); } + static inline void ifft(dft_plan<ST> const& plan, univector<complex<ST>>& out, + const univector<complex<ST>>& in, univector<u8>& temp) + { + plan.execute(out, in, temp, ctrue); + } + static size_t csize(dft_plan<ST> const& plan) { return plan.size; } +}; template <typename T> -convolve_filter<T>::convolve_filter(size_t size, size_t block_size) - : size(size), block_size(block_size), fft(2 * next_poweroftwo(block_size), dft_pack_format::Perm), - temp(fft.temp_size), segments((size + block_size - 1) / block_size) +struct convolve_filter_fft<T, T, dft_plan_real<T>> +{ + static dft_plan_real<T> make(size_t size) { return dft_plan_real<T>(size, dft_pack_format::Perm); } + static inline void ifft(dft_plan_real<T> const& plan, univector<T>& out, const univector<complex<T>>& in, + univector<u8>& temp) + { + plan.execute(out, in, temp); + } + static size_t csize(dft_plan_real<T> const& plan) { return plan.size / 2; } +}; +template <typename T> +convolve_filter<T>::convolve_filter(size_t size_, size_t block_size_) + : data_size(size_), block_size(next_poweroftwo(block_size_)), + fft(convolve_filter_fft<T>::make(2 * block_size)), temp(fft.temp_size), + segments((data_size + block_size - 1) / block_size), ir_segments(segments.size()), input_position(0), + saved_input(block_size), premul(convolve_filter_fft<T>::csize(fft)), + cscratch(convolve_filter_fft<T>::csize(fft)), scratch1(fft.size), scratch2(fft.size), + overlap(block_size), position(0) { } template <typename T> -convolve_filter<T>::convolve_filter(const univector_ref<const T>& data, size_t block_size) - : size(data.size()), block_size(next_poweroftwo(block_size)), - fft(2 * next_poweroftwo(block_size), dft_pack_format::Perm), temp(fft.temp_size), - segments((data.size() + next_poweroftwo(block_size) - 1) / next_poweroftwo(block_size)), - ir_segments((data.size() + next_poweroftwo(block_size) - 1) / next_poweroftwo(block_size)), - input_position(0), position(0) +convolve_filter<T>::convolve_filter(const univector_ref<const T>& data, size_t block_size_) + : convolve_filter(data.size(), block_size_) { set_data(data); } @@ -102,65 +138,125 @@ convolve_filter<T>::convolve_filter(const univector_ref<const T>& data, size_t b template <typename T> void convolve_filter<T>::set_data(const univector_ref<const T>& data) { + data_size = data.size(); + segments.resize((data_size + block_size - 1) / block_size); + ir_segments.resize(segments.size()); univector<T> input(fft.size); - const T ifftsize = reciprocal(T(fft.size)); + const ST ifftsize = reciprocal(ST(fft.size)); for (size_t i = 0; i < ir_segments.size(); i++) { - segments[i].resize(block_size); - ir_segments[i].resize(block_size, 0); + segments[i].resize(convolve_filter_fft<T>::csize(fft)); + ir_segments[i].resize(convolve_filter_fft<T>::csize(fft)); input = padded(data.slice(i * block_size, block_size)); fft.execute(ir_segments[i], input, temp); process(ir_segments[i], ir_segments[i] * ifftsize); } - saved_input.resize(block_size, 0); - scratch.resize(block_size * 2); - premul.resize(block_size, 0); - cscratch.resize(block_size); - overlap.resize(block_size, 0); + reset(); } template <typename T> void convolve_filter<T>::process_buffer(T* output, const T* input, size_t size) { + // Note that the conditionals in the following algorithm are meant to + // reduce complexity in the common cases of either processing complete + // blocks (processing == block_size) or only one segment. + + // For complex filtering, use CCs pack format to omit special processing in fft_multiply[_accumulate]. + static constexpr auto fft_multiply_pack = real_fft ? dft_pack_format::Perm : dft_pack_format::CCs; + size_t processed = 0; while (processed < size) { - const size_t processing = std::min(size - processed, block_size - input_position); - builtin_memcpy(saved_input.data() + input_position, input + processed, processing * sizeof(T)); + // Calculate how many samples to process this iteration. + auto const processing = std::min(size - processed, block_size - input_position); + + // Prepare input to forward FFT: + if (processing == block_size) + { + // No need to work with saved_input. + builtin_memcpy(scratch1.data(), input + processed, processing * sizeof(T)); + } + else + { + // Append this iteration's input to the saved_input current block. + builtin_memcpy(saved_input.data() + input_position, input + processed, processing * sizeof(T)); + builtin_memcpy(scratch1.data(), saved_input.data(), block_size * sizeof(T)); + } - process(scratch, padded(saved_input)); - fft.execute(segments[position], scratch, temp); + // Forward FFT saved_input block. + fft.execute(segments[position], scratch1, temp); - if (input_position == 0) + if (segments.size() == 1) { - process(premul, zeros()); - for (size_t i = 1; i < segments.size(); i++) + // Just one segment/block of history. + // Y_k = H * X_k + fft_multiply(cscratch, ir_segments[0], segments[0], fft_multiply_pack); + } + else + { + // More than one segment/block of history so this is more involved. + if (input_position == 0) { - const size_t n = (position + i) % segments.size(); - fft_multiply_accumulate(premul, ir_segments[i], segments[n], dft_pack_format::Perm); + // At the start of an input block, we premultiply the history from + // previous input blocks with the extended filter blocks. + + // Y_(k-i,i) = H_i * X_(k-i) + // premul += Y_(k-i,i) for i=1,...,N + + fft_multiply(premul, ir_segments[1], segments[(position + 1) % segments.size()], + fft_multiply_pack); + for (size_t i = 2; i < segments.size(); i++) + { + const size_t n = (position + i) % segments.size(); + fft_multiply_accumulate(premul, ir_segments[i], segments[n], fft_multiply_pack); + } } + // Y_(k,0) = H_0 * X_k + // Y_k = premul + Y_(k,0) + fft_multiply_accumulate(cscratch, premul, ir_segments[0], segments[position], fft_multiply_pack); } - fft_multiply_accumulate(cscratch, premul, ir_segments[0], segments[position], dft_pack_format::Perm); - - fft.execute(scratch, cscratch, temp); + // y_k = IFFT( Y_k ) + convolve_filter_fft<T>::ifft(fft, scratch2, cscratch, temp); + // z_k = y_k + overlap process(make_univector(output + processed, processing), - scratch.slice(input_position) + overlap.slice(input_position)); + scratch2.slice(input_position) + overlap.slice(input_position)); input_position += processing; + processed += processing; + + // If a whole block was processed, prepare for next block. if (input_position == block_size) { + // Input block k is complete. Move to (k+1)-th input block. input_position = 0; - process(saved_input, zeros()); - builtin_memcpy(overlap.data(), scratch.data() + block_size, block_size * sizeof(T)); + // Zero out the saved_input if it will be used in the next iteration. + auto const remaining = size - processed; + if (remaining < block_size && remaining > 0) + { + process(saved_input, zeros()); + } + + builtin_memcpy(overlap.data(), scratch2.data() + block_size, block_size * sizeof(T)); position = position > 0 ? position - 1 : segments.size() - 1; } + } +} - processed += processing; +template <typename T> +void convolve_filter<T>::reset() +{ + for (auto& segment : segments) + { + process(segment, zeros()); } + position = 0; + process(saved_input, zeros()); + input_position = 0; + process(overlap, zeros()); } namespace intrinsics @@ -168,40 +264,68 @@ namespace intrinsics template univector<float> convolve<float>(const univector_ref<const float>&, const univector_ref<const float>&); +template univector<complex<float>> convolve<complex<float>>(const univector_ref<const complex<float>>&, + const univector_ref<const complex<float>>&); template univector<float> correlate<float>(const univector_ref<const float>&, const univector_ref<const float>&); +template univector<complex<float>> correlate<complex<float>>(const univector_ref<const complex<float>>&, + const univector_ref<const complex<float>>&); template univector<float> autocorrelate<float>(const univector_ref<const float>&); +template univector<complex<float>> autocorrelate<complex<float>>(const univector_ref<const complex<float>>&); } // namespace intrinsics template convolve_filter<float>::convolve_filter(size_t, size_t); +template convolve_filter<complex<float>>::convolve_filter(size_t, size_t); template convolve_filter<float>::convolve_filter(const univector_ref<const float>&, size_t); +template convolve_filter<complex<float>>::convolve_filter(const univector_ref<const complex<float>>&, size_t); template void convolve_filter<float>::set_data(const univector_ref<const float>&); +template void convolve_filter<complex<float>>::set_data(const univector_ref<const complex<float>>&); template void convolve_filter<float>::process_buffer(float* output, const float* input, size_t size); +template void convolve_filter<complex<float>>::process_buffer(complex<float>* output, + const complex<float>* input, size_t size); + +template void convolve_filter<float>::reset(); +template void convolve_filter<complex<float>>::reset(); namespace intrinsics { template univector<double> convolve<double>(const univector_ref<const double>&, const univector_ref<const double>&); +template univector<complex<double>> convolve<complex<double>>(const univector_ref<const complex<double>>&, + const univector_ref<const complex<double>>&); template univector<double> correlate<double>(const univector_ref<const double>&, const univector_ref<const double>&); +template univector<complex<double>> correlate<complex<double>>(const univector_ref<const complex<double>>&, + const univector_ref<const complex<double>>&); template univector<double> autocorrelate<double>(const univector_ref<const double>&); +template univector<complex<double>> autocorrelate<complex<double>>( + const univector_ref<const complex<double>>&); } // namespace intrinsics template convolve_filter<double>::convolve_filter(size_t, size_t); +template convolve_filter<complex<double>>::convolve_filter(size_t, size_t); template convolve_filter<double>::convolve_filter(const univector_ref<const double>&, size_t); +template convolve_filter<complex<double>>::convolve_filter(const univector_ref<const complex<double>>&, + size_t); template void convolve_filter<double>::set_data(const univector_ref<const double>&); +template void convolve_filter<complex<double>>::set_data(const univector_ref<const complex<double>>&); template void convolve_filter<double>::process_buffer(double* output, const double* input, size_t size); +template void convolve_filter<complex<double>>::process_buffer(complex<double>* output, + const complex<double>* input, size_t size); + +template void convolve_filter<double>::reset(); +template void convolve_filter<complex<double>>::reset(); template <typename T> filter<T>* make_convolve_filter(const univector_ref<const T>& taps, size_t block_size) @@ -210,7 +334,9 @@ filter<T>* make_convolve_filter(const univector_ref<const T>& taps, size_t block } template filter<float>* make_convolve_filter(const univector_ref<const float>&, size_t); +template filter<complex<float>>* make_convolve_filter(const univector_ref<const complex<float>>&, size_t); template filter<double>* make_convolve_filter(const univector_ref<const double>&, size_t); +template filter<complex<double>>* make_convolve_filter(const univector_ref<const complex<double>>&, size_t); } // namespace CMT_ARCH_NAME } // namespace kfr diff --git a/include/kfr/dsp/delay.hpp b/include/kfr/dsp/delay.hpp @@ -25,42 +25,73 @@ */ #pragma once +#include "../base/basic_expressions.hpp" #include "../base/expression.hpp" #include "../base/univector.hpp" +#include "state_holder.hpp" namespace kfr { inline namespace CMT_ARCH_NAME { +template <typename T, size_t samples, univector_tag Tag = samples> +struct delay_state +{ + template <size_t S2 = samples, KFR_ENABLE_IF(S2 == Tag)> + delay_state() : data({ 0 }), cursor(0) + { + } + + template <size_t S2 = samples, KFR_ENABLE_IF(S2 != Tag)> + delay_state() : data(samples), cursor(0) + { + } + + mutable univector<T, Tag> data; + mutable size_t cursor; +}; + +template <typename T> +struct delay_state<T, 1, 1> +{ + mutable T data = T(0); +}; + namespace internal { -template <size_t delay, typename E> + +template <size_t delay, typename E, bool stateless, univector_tag STag> struct expression_delay : expression_with_arguments<E> { using value_type = value_type_of<E>; using T = value_type; using expression_with_arguments<E>::expression_with_arguments; + expression_delay(E&& e, const delay_state<T, delay, STag>& state) + : expression_with_arguments<E>(std::forward<E>(e)), state(state) + { + } + template <size_t N, KFR_ENABLE_IF(N <= delay)> friend KFR_INTRINSIC vec<T, N> get_elements(const expression_delay& self, cinput_t cinput, size_t index, vec_shape<T, N>) { vec<T, N> out; - size_t c = self.cursor; - self.data.ringbuf_read(c, out); + size_t c = self.state.s.cursor; + self.state.s.data.ringbuf_read(c, out); const vec<T, N> in = self.argument_first(cinput, index, vec_shape<T, N>()); - self.data.ringbuf_write(self.cursor, in); + self.state.s.data.ringbuf_write(self.state.s.cursor, in); return out; } friend vec<T, 1> get_elements(const expression_delay& self, cinput_t cinput, size_t index, vec_shape<T, 1>) { T out; - size_t c = self.cursor; - self.data.ringbuf_read(c, out); + size_t c = self.state.s.cursor; + self.state.s.data.ringbuf_read(c, out); const T in = self.argument_first(cinput, index, vec_shape<T, 1>())[0]; - self.data.ringbuf_write(self.cursor, in); + self.state.s.data.ringbuf_write(self.state.s.cursor, in); return out; } template <size_t N, KFR_ENABLE_IF(N > delay)> @@ -68,34 +99,38 @@ struct expression_delay : expression_with_arguments<E> vec_shape<T, N>) { vec<T, delay> out; - size_t c = self.cursor; - self.data.ringbuf_read(c, out); + size_t c = self.state.s.cursor; + self.state.s.data.ringbuf_read(c, out); const vec<T, N> in = self.argument_first(cinput, index, vec_shape<T, N>()); - self.data.ringbuf_write(self.cursor, slice<N - delay, delay>(in)); + self.state.s.data.ringbuf_write(self.state.s.cursor, slice<N - delay, delay>(in)); return concat_and_slice<0, N>(out, in); } - mutable univector<value_type, delay> data = scalar(value_type(0)); - mutable size_t cursor = 0; + state_holder<delay_state<T, delay, STag>, stateless> state; }; -template <typename E> -struct expression_delay<1, E> : expression_with_arguments<E> +template <typename E, bool stateless, univector_tag STag> +struct expression_delay<1, E, stateless, STag> : expression_with_arguments<E> { using value_type = value_type_of<E>; using T = value_type; using expression_with_arguments<E>::expression_with_arguments; + expression_delay(E&& e, const delay_state<T, 1, STag>& state) + : expression_with_arguments<E>(std::forward<E>(e)), state(state) + { + } + template <size_t N> friend KFR_INTRINSIC vec<T, N> get_elements(const expression_delay& self, cinput_t cinput, size_t index, vec_shape<T, N>) { const vec<T, N> in = self.argument_first(cinput, index, vec_shape<T, N>()); - const vec<T, N> out = insertleft(self.data, in); - self.data = in[N - 1]; + const vec<T, N> out = insertleft(self.state.s.data, in); + self.state.s.data = in[N - 1]; return out; } - mutable value_type data = value_type(0); + state_holder<delay_state<T, 1, STag>, stateless> state; }; } // namespace internal @@ -108,11 +143,30 @@ struct expression_delay<1, E> : expression_with_arguments<E> * auto d = delay(v, csize<4>); * @endcode */ -template <size_t samples = 1, typename E1> -KFR_INTRINSIC internal::expression_delay<samples, E1> delay(E1&& e1, csize_t<samples> = csize_t<samples>()) +template <size_t samples = 1, typename E1, typename T = value_type_of<E1>> +KFR_INTRINSIC internal::expression_delay<samples, E1, false, samples> delay(E1&& e1) { static_assert(samples >= 1 && samples < 1024, ""); - return internal::expression_delay<samples, E1>(std::forward<E1>(e1)); + return internal::expression_delay<samples, E1, false, samples>(std::forward<E1>(e1), + delay_state<T, samples>()); +} + +/** + * @brief Returns template expression that applies delay to the input (uses ring buffer in state) + * @param state delay filter state + * @param e1 an input expression + * @code + * univector<double, 10> v = counter(); + * delay_state<double, 4> state; + * auto d = delay(state, v); + * @endcode + */ +template <size_t samples, typename T, typename E1, univector_tag STag> +KFR_INTRINSIC internal::expression_delay<samples, E1, true, STag> delay(delay_state<T, samples, STag>& state, + E1&& e1) +{ + static_assert(STag == tag_dynamic_vector || (samples >= 1 && samples < 1024), ""); + return internal::expression_delay<samples, E1, true, STag>(std::forward<E1>(e1), state); } } // namespace CMT_ARCH_NAME } // namespace kfr diff --git a/include/kfr/dsp/fir.hpp b/include/kfr/dsp/fir.hpp @@ -31,6 +31,7 @@ #include "../base/reduce.hpp" #include "../base/univector.hpp" #include "../simd/vec.hpp" +#include "state_holder.hpp" namespace kfr { @@ -70,28 +71,23 @@ struct fir_state mutable size_t delayline_cursor; }; -namespace internal +template <typename U, univector_tag Tag = tag_dynamic_vector> +struct moving_sum_state { - -template <typename T, bool stateless> -struct state_holder + moving_sum_state() : delayline({ 0 }), head_cursor(0), tail_cursor(1) {} + mutable univector<U, Tag> delayline; + mutable size_t head_cursor, tail_cursor; +}; +template <typename U> +struct moving_sum_state<U, tag_dynamic_vector> { - state_holder() = delete; - state_holder(const state_holder&) = default; - state_holder(state_holder&&) = default; - constexpr state_holder(const T& state) CMT_NOEXCEPT : s(state) {} - T s; + moving_sum_state(size_t sum_length) : delayline(sum_length, U(0)), head_cursor(0), tail_cursor(1) {} + mutable univector<U> delayline; + mutable size_t head_cursor, tail_cursor; }; -template <typename T> -struct state_holder<T, true> +namespace internal { - state_holder() = delete; - state_holder(const state_holder&) = default; - state_holder(state_holder&&) = default; - constexpr state_holder(const T& state) CMT_NOEXCEPT : s(state) {} - const T& s; -}; template <size_t tapcount, typename T, typename U, typename E1, bool stateless = false> struct expression_short_fir : expression_with_arguments<E1> @@ -153,6 +149,51 @@ struct expression_fir : expression_with_arguments<E1> } state_holder<fir_state<T, U>, stateless> state; }; + +template <typename U, typename E1, univector_tag STag, bool stateless = false> +struct expression_moving_sum : expression_with_arguments<E1> +{ + using value_type = U; + + expression_moving_sum(E1&& e1, const moving_sum_state<U, STag>& state) + : expression_with_arguments<E1>(std::forward<E1>(e1)), state(state) + { + } + + template <size_t N> + KFR_INTRINSIC friend vec<U, N> get_elements(const expression_moving_sum& self, cinput_t cinput, + size_t index, vec_shape<U, N> x) + { + static_assert(N >= 1, ""); + + const vec<U, N> input = self.argument_first(cinput, index, x); + + vec<U, N> output; + size_t wcursor = self.state.s.head_cursor; + size_t rcursor = self.state.s.tail_cursor; + + // initial summation + self.state.s.delayline.ringbuf_write(wcursor, input[0]); + auto s = sum(self.state.s.delayline); + output[0] = s; + + CMT_LOOP_NOUNROLL + for (size_t i = 1; i < N; i++) + { + U nextout; + self.state.s.delayline.ringbuf_read(rcursor, nextout); + U const nextin = input[i]; + self.state.s.delayline.ringbuf_write(wcursor, nextin); + s += nextin - nextout; + output[i] = s; + } + self.state.s.delayline.ringbuf_step(rcursor, 1); + self.state.s.head_cursor = wcursor; + self.state.s.tail_cursor = rcursor; + return output; + } + state_holder<moving_sum_state<U, STag>, stateless> state; +}; } // namespace internal /** @@ -178,6 +219,30 @@ KFR_INTRINSIC internal::expression_fir<T, U, E1, true> fir(fir_state<T, U>& stat } /** + * @brief Returns template expression that performs moving sum on the input + * @param state moving sum state + * @param e1 an input expression + */ +template <size_t sum_length, typename E1> +KFR_INTRINSIC internal::expression_moving_sum<value_type_of<E1>, E1, tag_dynamic_vector> moving_sum(E1&& e1) +{ + return internal::expression_moving_sum<value_type_of<E1>, E1, tag_dynamic_vector>(std::forward<E1>(e1), + sum_length); +} + +/** + * @brief Returns template expression that performs moving sum on the input + * @param state moving sum state + * @param e1 an input expression + */ +template <typename U, typename E1, univector_tag STag> +KFR_INTRINSIC internal::expression_moving_sum<U, E1, STag, true> moving_sum(moving_sum_state<U, STag>& state, + E1&& e1) +{ + return internal::expression_moving_sum<U, E1, STag, true>(std::forward<E1>(e1), state); +} + +/** * @brief Returns template expression that applies FIR filter to the input (count of coefficients must be in * range 2..32) * @param e1 an input expression diff --git a/include/kfr/dsp/iir_design.hpp b/include/kfr/dsp/iir_design.hpp @@ -63,7 +63,7 @@ KFR_FUNCTION zpk<T> chebyshev1(int N, identity<T> rp) univector<T> theta = c_pi<T> * m / (2 * N); univector<complex<T>> p = -csinh(make_complex(mu, theta)); - T k = product(-p).re; + T k = product(-p).real(); if (N % 2 == 0) k = k / sqrt(1.0 + eps * eps); return { {}, std::move(p), k }; @@ -98,7 +98,7 @@ KFR_FUNCTION zpk<T> chebyshev2(int N, identity<T> rs) p = make_complex(sinh(mu) * real(p), cosh(mu) * imag(p)); p = 1.0 / p; - T k = (product(-p) / product(-z)).re; + T k = (product(-p) / product(-z)).real(); return { std::move(z), std::move(p), k }; } @@ -868,7 +868,7 @@ KFR_FUNCTION zpk<T> bilinear(const zpk<T>& filter, identity<T> fs) result.z = (fs2 + filter.z) / (fs2 - filter.z); result.p = (fs2 + filter.p) / (fs2 - filter.p); result.z.resize(result.p.size(), complex<T>(-1)); - result.k = filter.k * real(product(fs2 - filter.z) / product(fs2 - filter.p)); + result.k = filter.k * kfr::real(product(fs2 - filter.z) / product(fs2 - filter.p)); return result; } @@ -881,7 +881,7 @@ struct zero_pole_pairs template <typename T> KFR_FUNCTION vec<T, 3> zpk2tf_poly(const complex<T>& x, const complex<T>& y) { - return { T(1), -(x.re + y.re), x.re * y.re - x.im * y.im }; + return { T(1), -(x.real() + y.real()), x.real() * y.real() - x.imag() * y.imag() }; } template <typename T> @@ -897,18 +897,18 @@ template <typename T> KFR_FUNCTION univector<complex<T>> cplxreal(const univector<complex<T>>& list) { univector<complex<T>> x = list; - std::sort(x.begin(), x.end(), [](const complex<T>& a, const complex<T>& b) { return a.re < b.re; }); + std::sort(x.begin(), x.end(), [](const complex<T>& a, const complex<T>& b) { return a.real() < b.real(); }); T tol = std::numeric_limits<T>::epsilon() * 100; univector<complex<T>> result = x; for (size_t i = result.size(); i > 1; i--) { if (!isreal(result[i - 1]) && !isreal(result[i - 2])) { - if (abs(result[i - 1].re - result[i - 2].re) < tol && - abs(result[i - 1].im + result[i - 2].im) < tol) + if (abs(result[i - 1].real() - result[i - 2].real()) < tol && + abs(result[i - 1].imag() + result[i - 2].imag()) < tol) { result.erase(result.begin() + i - 1); - result[i - 2].im = abs(result[i - 2].im); + result[i - 2].imag(abs(result[i - 2].imag())); } } } @@ -951,7 +951,7 @@ KFR_FUNCTION int countreal(const univector<complex<T>>& list) int nreal = 0; for (complex<T> c : list) { - if (c.im == 0) + if (c.imag() == 0) nreal++; } return nreal; @@ -974,7 +974,7 @@ KFR_FUNCTION zpk<T> lp2hp_zpk(const zpk<T>& filter, identity<T> wo) result.z = wo / filter.z; result.p = wo / filter.p; result.z.resize(result.p.size(), T(0)); - result.k = filter.k * real(product(-filter.z) / product(-filter.p)); + result.k = filter.k * kfr::real(product(-filter.z) / product(-filter.p)); return result; } @@ -1012,7 +1012,7 @@ KFR_FUNCTION zpk<T> lp2bs_zpk(const zpk<T>& filter, identity<T> wo, identity<T> result.z.resize(result.z.size() + filter.p.size() - filter.z.size(), complex<T>(0, +wo)); result.z.resize(result.z.size() + filter.p.size() - filter.z.size(), complex<T>(0, -wo)); - result.k = filter.k * real(product(-filter.z) / product(-filter.p)); + result.k = filter.k * kfr::real(product(-filter.z) / product(-filter.p)); return result; } diff --git a/include/kfr/dsp/state_holder.hpp b/include/kfr/dsp/state_holder.hpp @@ -0,0 +1,41 @@ +/** @addtogroup fir + * @{ + */ +/** + * KFR (http://kfrlib.com) + * Copyright (C) 2016 D Levin + * See LICENSE.txt for details + */ +#pragma once + +#include "../cident.h" + +namespace kfr +{ +inline namespace CMT_ARCH_NAME +{ +namespace internal +{ + +template <typename T, bool stateless> +struct state_holder +{ + state_holder() = delete; + state_holder(const state_holder&) = default; + state_holder(state_holder&&) = default; + constexpr state_holder(const T& state) CMT_NOEXCEPT : s(state) {} + T s; +}; + +template <typename T> +struct state_holder<T, true> +{ + state_holder() = delete; + state_holder(const state_holder&) = default; + state_holder(state_holder&&) = default; + constexpr state_holder(const T& state) CMT_NOEXCEPT : s(state) {} + const T& s; +}; +} // namespace internal +} // namespace CMT_ARCH_NAME +} // namespace kfr diff --git a/include/kfr/math/complex_math.hpp b/include/kfr/math/complex_math.hpp @@ -64,6 +64,12 @@ KFR_INTRINSIC vec<complex<T>, N> ccosh(const vec<complex<T>, N>& x) } template <typename T, size_t N> +KFR_INTRINSIC vec<T, N> cabssqr(const vec<complex<T>, N>& x) +{ + const vec<T, N* 2> xx = sqr(cdecom(x)); + return even(xx) + odd(xx); +} +template <typename T, size_t N> KFR_INTRINSIC vec<T, N> cabs(const vec<complex<T>, N>& x) { const vec<T, N* 2> xx = sqr(cdecom(x)); @@ -158,6 +164,11 @@ KFR_HANDLE_SCALAR(csqrt) KFR_HANDLE_SCALAR(csqr) template <typename T, size_t N> +KFR_INTRINSIC vec<T, N> cabssqr(const vec<T, N>& a) +{ + return to_scalar(intrinsics::cabssqr(static_cast<vec<complex<T>, N>>(a))); +} +template <typename T, size_t N> KFR_INTRINSIC vec<T, N> cabs(const vec<T, N>& a) { return to_scalar(intrinsics::cabs(static_cast<vec<complex<T>, N>>(a))); @@ -168,6 +179,12 @@ KFR_INTRINSIC vec<T, N> carg(const vec<T, N>& a) return to_scalar(intrinsics::carg(static_cast<vec<complex<T>, N>>(a))); } template <typename T1> +KFR_INTRINSIC realtype<T1> cabssqr(const T1& a) +{ + using vecout = vec1<T1>; + return to_scalar(intrinsics::cabssqr(vecout(a))); +} +template <typename T1> KFR_INTRINSIC realtype<T1> cabs(const T1& a) { using vecout = vec1<T1>; @@ -185,6 +202,7 @@ KFR_I_FN(csin) KFR_I_FN(csinh) KFR_I_FN(ccos) KFR_I_FN(ccosh) +KFR_I_FN(cabssqr) KFR_I_FN(cabs) KFR_I_FN(carg) KFR_I_FN(clog) @@ -254,6 +272,21 @@ KFR_FUNCTION internal::expression_function<fn::ccosh, E1> ccosh(E1&& x) return { fn::ccosh(), std::forward<E1>(x) }; } +/// @brief Returns the squared absolute value (magnitude squared) of the complex number x +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>)> +KFR_FUNCTION realtype<T1> cabssqr(const T1& x) +{ + return intrinsics::cabssqr(x); +} + +/// @brief Returns template expression that returns the squared absolute value (magnitude squared) of the +/// complex number x +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>)> +KFR_FUNCTION internal::expression_function<fn::cabssqr, E1> cabssqr(E1&& x) +{ + return { fn::cabssqr(), std::forward<E1>(x) }; +} + /// @brief Returns the absolute value (magnitude) of the complex number x template <typename T1, KFR_ENABLE_IF(is_numeric<T1>)> KFR_FUNCTION realtype<T1> cabs(const T1& x) diff --git a/include/kfr/simd/complex.hpp b/include/kfr/simd/complex.hpp @@ -60,13 +60,13 @@ struct complex constexpr complex(const complex&) CMT_NOEXCEPT = default; constexpr complex(complex&&) CMT_NOEXCEPT = default; template <typename U> - KFR_MEM_INTRINSIC constexpr complex(const complex<U>& other) CMT_NOEXCEPT : re(static_cast<T>(other.re)), - im(static_cast<T>(other.im)) + KFR_MEM_INTRINSIC constexpr complex(const complex<U>& other) CMT_NOEXCEPT : re(static_cast<T>(other.real())), + im(static_cast<T>(other.imag())) { } template <typename U> - KFR_MEM_INTRINSIC constexpr complex(complex<U>&& other) CMT_NOEXCEPT : re(std::move(other.re)), - im(std::move(other.im)) + KFR_MEM_INTRINSIC constexpr complex(complex<U>&& other) CMT_NOEXCEPT : re(std::move(other.real())), + im(std::move(other.imag())) { } #ifdef CMT_COMPILER_GNU @@ -80,6 +80,7 @@ struct complex KFR_MEM_INTRINSIC constexpr const T& imag() const CMT_NOEXCEPT { return im; } KFR_MEM_INTRINSIC constexpr void real(T value) CMT_NOEXCEPT { re = value; } KFR_MEM_INTRINSIC constexpr void imag(T value) CMT_NOEXCEPT { im = value; } +private: T re; T im; }; diff --git a/tests/base_test.cpp b/tests/base_test.cpp @@ -92,6 +92,18 @@ TEST(test_basic) CHECK(inrange(pack(1, 2, 3), 1, 1) == make_mask<int>(true, false, false)); } +TEST(test_gen_expj) +{ + kfr::univector<cbase> v = kfr::truncate(kfr::gen_expj(0.f, constants<float>::pi_s(2) * 0.1f), 1000); + CHECK(rms(cabs(v.slice(990) - + univector<cbase>({ cbase(1., +0.00000000e+00), cbase(0.80901699, +5.87785252e-01), + cbase(0.30901699, +9.51056516e-01), cbase(-0.30901699, +9.51056516e-01), + cbase(-0.80901699, +5.87785252e-01), cbase(-1., +1.22464680e-16), + cbase(-0.80901699, -5.87785252e-01), + cbase(-0.30901699, -9.51056516e-01), cbase(0.30901699, -9.51056516e-01), + cbase(0.80901699, -5.87785252e-01) }))) < 0.00001); +} + } // namespace CMT_ARCH_NAME #ifndef KFR_NO_MAIN diff --git a/tests/dft_test.cpp b/tests/dft_test.cpp @@ -32,7 +32,17 @@ TEST(test_convolve) CHECK(rms(c - univector<fbase>({ 0.25, 1., 2.75, 2.5, 3.75, 3.5, 1.5, -4., 7.5 })) < 0.0001); } -TEST(test_fft_convolve) +TEST(test_complex_convolve) +{ + univector<complex<fbase>, 5> a({ 1, 2, 3, 4, 5 }); + univector<complex<fbase>, 5> b({ 0.25, 0.5, 1.0, -2.0, 1.5 }); + univector<complex<fbase>> c = convolve(a, b); + CHECK(c.size() == 9u); + CHECK(rms(cabs(c - univector<complex<fbase>>({ 0.25, 1., 2.75, 2.5, 3.75, 3.5, 1.5, -4., 7.5 }))) < + 0.0001); +} + +TEST(test_convolve_filter) { univector<fbase, 5> a({ 1, 2, 3, 4, 5 }); univector<fbase, 5> b({ 0.25, 0.5, 1.0, -2.0, 1.5 }); @@ -42,6 +52,21 @@ TEST(test_fft_convolve) CHECK(rms(dest - univector<fbase>({ 0.25, 1., 2.75, 2.5, 3.75 })) < 0.0001); } +TEST(test_complex_convolve_filter) +{ + univector<complex<fbase>, 5> a({ 1, 2, 3, 4, 5 }); + univector<complex<fbase>, 5> b({ 0.25, 0.5, 1.0, -2.0, 1.5 }); + univector<complex<fbase>, 5> dest; + convolve_filter<complex<fbase>> filter(a); + filter.apply(dest, b); + CHECK(rms(cabs(dest - univector<complex<fbase>>({ 0.25, 1., 2.75, 2.5, 3.75 }))) < 0.0001); + filter.apply(dest, b); + CHECK(rms(cabs(dest - univector<complex<fbase>>({ 0.25, 1., 2.75, 2.5, 3.75 }))) > 0.0001); + filter.reset(); + filter.apply(dest, b); + CHECK(rms(cabs(dest - univector<complex<fbase>>({ 0.25, 1., 2.75, 2.5, 3.75 }))) < 0.0001); +} + TEST(test_correlate) { univector<fbase, 5> a({ 1, 2, 3, 4, 5 }); @@ -51,6 +76,15 @@ TEST(test_correlate) CHECK(rms(c - univector<fbase>({ 1.5, 1., 1.5, 2.5, 3.75, -4., 7.75, 3.5, 1.25 })) < 0.0001); } +TEST(test_complex_correlate) +{ + univector<complex<fbase>, 5> a({ 1, 2, 3, 4, 5 }); + univector<complex<fbase>, 5> b({ 0.25, 0.5, 1.0, -2.0, 1.5 }); + univector<complex<fbase>> c = correlate(a, b); + CHECK(c.size() == 9u); + CHECK(rms(cabs(c - univector<fbase>({ 1.5, 1., 1.5, 2.5, 3.75, -4., 7.75, 3.5, 1.25 }))) < 0.0001); +} + #if defined CMT_ARCH_ARM || !defined NDEBUG constexpr size_t fft_stopsize = 12; constexpr size_t dft_stopsize = 101; diff --git a/tests/dsp_test.cpp b/tests/dsp_test.cpp @@ -287,6 +287,12 @@ TEST(delay) CHECK_EXPRESSION(delay(v1), 33, [](size_t i) { return i < 1 ? 0.f : (i - 1) + 100.f; }); CHECK_EXPRESSION(delay<3>(v1), 33, [](size_t i) { return i < 3 ? 0.f : (i - 3) + 100.f; }); + + delay_state<float, 3> state1; + CHECK_EXPRESSION(delay(state1, v1), 33, [](size_t i) { return i < 3 ? 0.f : (i - 3) + 100.f; }); + + delay_state<float, 3, tag_dynamic_vector> state2; + CHECK_EXPRESSION(delay(state2, v1), 33, [](size_t i) { return i < 3 ? 0.f : (i - 3) + 100.f; }); } TEST(fracdelay) @@ -396,12 +402,46 @@ TEST(fir) return result; }); + fir_state state(taps.ref()); + + CHECK_EXPRESSION(fir(state, data), 100, [&](size_t index) -> T { + T result = 0; + for (size_t i = 0; i < taps.size(); i++) + result += data.get(index - i, 0) * taps[i]; + return result; + }); + CHECK_EXPRESSION(short_fir(data, taps), 100, [&](size_t index) -> T { T result = 0; for (size_t i = 0; i < taps.size(); i++) result += data.get(index - i, 0) * taps[i]; return result; }); + + CHECK_EXPRESSION(moving_sum<taps.size()>(data), 100, [&](size_t index) -> T { + T result = 0; + for (size_t i = 0; i < taps.size(); i++) + result += data.get(index - i, 0); + return result; + }); + + moving_sum_state<T, 131> msstate1; + + CHECK_EXPRESSION(moving_sum(msstate1, data), 100, [&](size_t index) -> T { + T result = 0; + for (size_t i = 0; i < msstate1.delayline.size(); i++) + result += data.get(index - i, 0); + return result; + }); + + moving_sum_state<T> msstate2(133); + + CHECK_EXPRESSION(moving_sum(msstate2, data), 100, [&](size_t index) -> T { + T result = 0; + for (size_t i = 0; i < msstate2.delayline.size(); i++) + result += data.get(index - i, 0); + return result; + }); }); #endif } @@ -444,7 +484,7 @@ inline std::complex<T> from_std(const std::complex<T>& c) template <typename T> inline std::complex<T> to_std(const kfr::complex<T>& c) { - return { c.re, c.im }; + return { c.real(), c.imag() }; } template <typename T> @@ -584,9 +624,9 @@ TEST(resampler_test) } // namespace CMT_ARCH_NAME #ifndef KFR_NO_MAIN -int main() +int main(int argc, char* argv[]) { println(library_version()); - return testo::run_all("", true); + return testo::run_all(argc > 1 ? argv[1] : "", true); } #endif