kfr

Fast, modern C++ DSP framework, FFT, Sample Rate Conversion, FIR/IIR/Biquad Filters (SSE, AVX, AVX-512, ARM NEON)
Log | Files | Refs | README

commit a12bf85a47d39d388bfcdc827853cd94e8a79b20
parent 9070461d316045a0e93bf2da8d99a7a7ae1b34b7
Author: d.levin256@gmail.com <d.levin256@gmail.com>
Date:   Wed, 27 Nov 2019 10:51:22 +0000

C API: Detect cpu at runtime

Diffstat:
Mcapi/capi.cpp | 22++++++++++++----------
Minclude/kfr/cident.h | 23+++++++++++++++++++++++
Minclude/kfr/dft/convolution.hpp | 7+++++++
Minclude/kfr/dsp/biquad.hpp | 8++++++++
Minclude/kfr/dsp/fir.hpp | 8++++++++
5 files changed, 58 insertions(+), 10 deletions(-)

diff --git a/capi/capi.cpp b/capi/capi.cpp @@ -289,39 +289,41 @@ extern "C" KFR_FILTER_F32* kfr_filter_create_fir_plan_f32(const kfr_f32* taps, size_t size) { - return reinterpret_cast<KFR_FILTER_F32*>(make_fir_filter<float>(make_univector(taps, size))); + return reinterpret_cast<KFR_FILTER_F32*>( + make_fir_filter<float>(cpu_t::runtime, make_univector(taps, size))); } KFR_FILTER_F64* kfr_filter_create_fir_plan_f64(const kfr_f64* taps, size_t size) { - return reinterpret_cast<KFR_FILTER_F64*>(make_fir_filter<double>(make_univector(taps, size))); + return reinterpret_cast<KFR_FILTER_F64*>( + make_fir_filter<double>(cpu_t::runtime, make_univector(taps, size))); } KFR_FILTER_F32* kfr_filter_create_convolution_plan_f32(const kfr_f32* taps, size_t size, size_t block_size) { - return reinterpret_cast<KFR_FILTER_F32*>( - make_convolve_filter<float>(make_univector(taps, size), block_size ? block_size : 1024)); + return reinterpret_cast<KFR_FILTER_F32*>(make_convolve_filter<float>( + cpu_t::runtime, make_univector(taps, size), block_size ? block_size : 1024)); } KFR_FILTER_F64* kfr_filter_create_convolution_plan_f64(const kfr_f64* taps, size_t size, size_t block_size) { - return reinterpret_cast<KFR_FILTER_F64*>( - make_convolve_filter<double>(make_univector(taps, size), block_size ? block_size : 1024)); + return reinterpret_cast<KFR_FILTER_F64*>(make_convolve_filter<double>( + cpu_t::runtime, make_univector(taps, size), block_size ? block_size : 1024)); } KFR_FILTER_F32* kfr_filter_create_iir_plan_f32(const kfr_f32* sos, size_t sos_count) { if (sos_count < 1 || sos_count > 64) return nullptr; - return reinterpret_cast<KFR_FILTER_F32*>( - make_biquad_filter<float, 64>(reinterpret_cast<const biquad_params<float>*>(sos), sos_count)); + return reinterpret_cast<KFR_FILTER_F32*>(make_biquad_filter<float, 64>( + cpu_t::runtime, reinterpret_cast<const biquad_params<float>*>(sos), sos_count)); } KFR_FILTER_F64* kfr_filter_create_iir_plan_f64(const kfr_f64* sos, size_t sos_count) { if (sos_count < 1 || sos_count > 64) return nullptr; - return reinterpret_cast<KFR_FILTER_F64*>( - make_biquad_filter<double, 64>(reinterpret_cast<const biquad_params<double>*>(sos), sos_count)); + return reinterpret_cast<KFR_FILTER_F64*>(make_biquad_filter<double, 64>( + cpu_t::runtime, reinterpret_cast<const biquad_params<double>*>(sos), sos_count)); } void kfr_filter_process_f32(KFR_FILTER_F32* plan, kfr_f32* output, const kfr_f32* input, size_t size) diff --git a/include/kfr/cident.h b/include/kfr/cident.h @@ -736,6 +736,29 @@ extern char* gets(char* __s); #endif #ifdef CMT_MULTI + +#define CMT_MULTI_PROTO_GATE(...) \ + if (cpu == cpu_t::runtime) \ + cpu = get_cpu(); \ + switch (cpu) \ + { \ + case cpu_t::avx512: \ + CMT_IF_ENABLED_AVX512(return avx512::__VA_ARGS__;) \ + case cpu_t::avx2: \ + CMT_IF_ENABLED_AVX2(return avx2::__VA_ARGS__;) \ + case cpu_t::avx: \ + CMT_IF_ENABLED_AVX(return avx::__VA_ARGS__;) \ + case cpu_t::sse41: \ + CMT_IF_ENABLED_SSE41(return sse41::__VA_ARGS__;) \ + case cpu_t::ssse3: \ + CMT_IF_ENABLED_SSSE3(return ssse3::__VA_ARGS__;) \ + case cpu_t::sse3: \ + CMT_IF_ENABLED_SSE3(return sse3::__VA_ARGS__;) \ + case cpu_t::sse2: \ + CMT_IF_ENABLED_SSE2(return sse2::__VA_ARGS__;) \ + default: \ + return {}; \ + } #define CMT_MULTI_PROTO(...) \ inline namespace CMT_ARCH_NAME \ { \ diff --git a/include/kfr/dft/convolution.hpp b/include/kfr/dft/convolution.hpp @@ -112,5 +112,12 @@ protected: CMT_MULTI_PROTO(template <typename T> filter<T>* make_convolve_filter(const univector_ref<const T>& taps, size_t block_size);) +#ifdef CMT_MULTI +template <typename T> +KFR_FUNCTION filter<T>* make_convolve_filter(cpu_t cpu, const univector_ref<const T>& taps, size_t block_size) +{ + CMT_MULTI_PROTO_GATE(make_convolve_filter<T>(taps, block_size)) +} +#endif } // namespace kfr CMT_PRAGMA_GNU(GCC diagnostic pop) diff --git a/include/kfr/dsp/biquad.hpp b/include/kfr/dsp/biquad.hpp @@ -346,4 +346,12 @@ public: CMT_MULTI_PROTO(template <typename T, size_t maxfiltercount> filter<T>* make_biquad_filter(const biquad_params<T>* bq, size_t count);) + +#ifdef CMT_MULTI +template <typename T, size_t maxfiltercount> +KFR_FUNCTION filter<T>* make_biquad_filter(cpu_t cpu, const biquad_params<T>* bq, size_t count) +{ + CMT_MULTI_PROTO_GATE(make_biquad_filter<T, maxfiltercount>(bq, count)) +} +#endif } // namespace kfr diff --git a/include/kfr/dsp/fir.hpp b/include/kfr/dsp/fir.hpp @@ -228,4 +228,12 @@ using filter_fir = fir_filter<T, U>; CMT_MULTI_PROTO(template <typename U, typename T> filter<U>* make_fir_filter(const univector_ref<const T>& taps);) + +#ifdef CMT_MULTI +template <typename U, typename T> +KFR_FUNCTION filter<U>* make_fir_filter(cpu_t cpu, const univector_ref<const T>& taps) +{ + CMT_MULTI_PROTO_GATE(make_fir_filter<U>(taps)) +} +#endif } // namespace kfr