commit a12bf85a47d39d388bfcdc827853cd94e8a79b20
parent 9070461d316045a0e93bf2da8d99a7a7ae1b34b7
Author: d.levin256@gmail.com <d.levin256@gmail.com>
Date: Wed, 27 Nov 2019 10:51:22 +0000
C API: Detect cpu at runtime
Diffstat:
5 files changed, 58 insertions(+), 10 deletions(-)
diff --git a/capi/capi.cpp b/capi/capi.cpp
@@ -289,39 +289,41 @@ extern "C"
KFR_FILTER_F32* kfr_filter_create_fir_plan_f32(const kfr_f32* taps, size_t size)
{
- return reinterpret_cast<KFR_FILTER_F32*>(make_fir_filter<float>(make_univector(taps, size)));
+ return reinterpret_cast<KFR_FILTER_F32*>(
+ make_fir_filter<float>(cpu_t::runtime, make_univector(taps, size)));
}
KFR_FILTER_F64* kfr_filter_create_fir_plan_f64(const kfr_f64* taps, size_t size)
{
- return reinterpret_cast<KFR_FILTER_F64*>(make_fir_filter<double>(make_univector(taps, size)));
+ return reinterpret_cast<KFR_FILTER_F64*>(
+ make_fir_filter<double>(cpu_t::runtime, make_univector(taps, size)));
}
KFR_FILTER_F32* kfr_filter_create_convolution_plan_f32(const kfr_f32* taps, size_t size,
size_t block_size)
{
- return reinterpret_cast<KFR_FILTER_F32*>(
- make_convolve_filter<float>(make_univector(taps, size), block_size ? block_size : 1024));
+ return reinterpret_cast<KFR_FILTER_F32*>(make_convolve_filter<float>(
+ cpu_t::runtime, make_univector(taps, size), block_size ? block_size : 1024));
}
KFR_FILTER_F64* kfr_filter_create_convolution_plan_f64(const kfr_f64* taps, size_t size,
size_t block_size)
{
- return reinterpret_cast<KFR_FILTER_F64*>(
- make_convolve_filter<double>(make_univector(taps, size), block_size ? block_size : 1024));
+ return reinterpret_cast<KFR_FILTER_F64*>(make_convolve_filter<double>(
+ cpu_t::runtime, make_univector(taps, size), block_size ? block_size : 1024));
}
KFR_FILTER_F32* kfr_filter_create_iir_plan_f32(const kfr_f32* sos, size_t sos_count)
{
if (sos_count < 1 || sos_count > 64)
return nullptr;
- return reinterpret_cast<KFR_FILTER_F32*>(
- make_biquad_filter<float, 64>(reinterpret_cast<const biquad_params<float>*>(sos), sos_count));
+ return reinterpret_cast<KFR_FILTER_F32*>(make_biquad_filter<float, 64>(
+ cpu_t::runtime, reinterpret_cast<const biquad_params<float>*>(sos), sos_count));
}
KFR_FILTER_F64* kfr_filter_create_iir_plan_f64(const kfr_f64* sos, size_t sos_count)
{
if (sos_count < 1 || sos_count > 64)
return nullptr;
- return reinterpret_cast<KFR_FILTER_F64*>(
- make_biquad_filter<double, 64>(reinterpret_cast<const biquad_params<double>*>(sos), sos_count));
+ return reinterpret_cast<KFR_FILTER_F64*>(make_biquad_filter<double, 64>(
+ cpu_t::runtime, reinterpret_cast<const biquad_params<double>*>(sos), sos_count));
}
void kfr_filter_process_f32(KFR_FILTER_F32* plan, kfr_f32* output, const kfr_f32* input, size_t size)
diff --git a/include/kfr/cident.h b/include/kfr/cident.h
@@ -736,6 +736,29 @@ extern char* gets(char* __s);
#endif
#ifdef CMT_MULTI
+
+#define CMT_MULTI_PROTO_GATE(...) \
+ if (cpu == cpu_t::runtime) \
+ cpu = get_cpu(); \
+ switch (cpu) \
+ { \
+ case cpu_t::avx512: \
+ CMT_IF_ENABLED_AVX512(return avx512::__VA_ARGS__;) \
+ case cpu_t::avx2: \
+ CMT_IF_ENABLED_AVX2(return avx2::__VA_ARGS__;) \
+ case cpu_t::avx: \
+ CMT_IF_ENABLED_AVX(return avx::__VA_ARGS__;) \
+ case cpu_t::sse41: \
+ CMT_IF_ENABLED_SSE41(return sse41::__VA_ARGS__;) \
+ case cpu_t::ssse3: \
+ CMT_IF_ENABLED_SSSE3(return ssse3::__VA_ARGS__;) \
+ case cpu_t::sse3: \
+ CMT_IF_ENABLED_SSE3(return sse3::__VA_ARGS__;) \
+ case cpu_t::sse2: \
+ CMT_IF_ENABLED_SSE2(return sse2::__VA_ARGS__;) \
+ default: \
+ return {}; \
+ }
#define CMT_MULTI_PROTO(...) \
inline namespace CMT_ARCH_NAME \
{ \
diff --git a/include/kfr/dft/convolution.hpp b/include/kfr/dft/convolution.hpp
@@ -112,5 +112,12 @@ protected:
CMT_MULTI_PROTO(template <typename T>
filter<T>* make_convolve_filter(const univector_ref<const T>& taps, size_t block_size);)
+#ifdef CMT_MULTI
+template <typename T>
+KFR_FUNCTION filter<T>* make_convolve_filter(cpu_t cpu, const univector_ref<const T>& taps, size_t block_size)
+{
+ CMT_MULTI_PROTO_GATE(make_convolve_filter<T>(taps, block_size))
+}
+#endif
} // namespace kfr
CMT_PRAGMA_GNU(GCC diagnostic pop)
diff --git a/include/kfr/dsp/biquad.hpp b/include/kfr/dsp/biquad.hpp
@@ -346,4 +346,12 @@ public:
CMT_MULTI_PROTO(template <typename T, size_t maxfiltercount>
filter<T>* make_biquad_filter(const biquad_params<T>* bq, size_t count);)
+
+#ifdef CMT_MULTI
+template <typename T, size_t maxfiltercount>
+KFR_FUNCTION filter<T>* make_biquad_filter(cpu_t cpu, const biquad_params<T>* bq, size_t count)
+{
+ CMT_MULTI_PROTO_GATE(make_biquad_filter<T, maxfiltercount>(bq, count))
+}
+#endif
} // namespace kfr
diff --git a/include/kfr/dsp/fir.hpp b/include/kfr/dsp/fir.hpp
@@ -228,4 +228,12 @@ using filter_fir = fir_filter<T, U>;
CMT_MULTI_PROTO(template <typename U, typename T>
filter<U>* make_fir_filter(const univector_ref<const T>& taps);)
+
+#ifdef CMT_MULTI
+template <typename U, typename T>
+KFR_FUNCTION filter<U>* make_fir_filter(cpu_t cpu, const univector_ref<const T>& taps)
+{
+ CMT_MULTI_PROTO_GATE(make_fir_filter<U>(taps))
+}
+#endif
} // namespace kfr