kfr

Fast, modern C++ DSP framework, FFT, Sample Rate Conversion, FIR/IIR/Biquad Filters (SSE, AVX, AVX-512, ARM NEON)
Log | Files | Refs | README

commit 72429b0ac086a73e8181cb754677f72f2b77d917
parent 3d229639d43675e8001b9c3b8e69c23bb8a7af06
Author: d.levin256@gmail.com <d.levin256@gmail.com>
Date:   Thu, 11 Nov 2021 10:19:17 +0000

Workaround for clang bug

Diffstat:
MCMakeLists.txt | 10+++-------
Mcapi/CMakeLists.txt | 6+++---
Mexamples/CMakeLists.txt | 2+-
Mtests/CMakeLists.txt | 2+-
Mtests/dft_test.cpp | 56+++++++++++++++++++++++++++++++++++++++++++++++++++++++-
Mtools/CMakeLists.txt | 2+-
6 files changed, 64 insertions(+), 14 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt @@ -14,7 +14,7 @@ # You should have received a copy of the GNU General Public License # along with KFR. -cmake_minimum_required(VERSION 3.1) +cmake_minimum_required(VERSION 3.12) project(kfr CXX) @@ -131,7 +131,7 @@ if (APPLE) endif () if (NOT IOS) if (CLANG) - target_compile_options(kfr INTERFACE -Xclang -mstackrealign) + target_compile_options(kfr INTERFACE "SHELL:-Xclang -mstackrealign") elseif (NOT MSVC) target_compile_options(kfr INTERFACE -mstackrealign) endif () @@ -180,11 +180,7 @@ endfunction () if (ENABLE_DFT) - if (MSVC) - set(KFR_DFT_DEFS -fp:fast) - else () - set(KFR_DFT_DEFS -ffast-math) - endif () + set(KFR_DFT_DEFS "SHELL:-Xclang -ffp-contract=fast") if (ENABLE_DFT_MULTIARCH) add_library(kfr_dft INTERFACE) diff --git a/capi/CMakeLists.txt b/capi/CMakeLists.txt @@ -14,7 +14,7 @@ # You should have received a copy of the GNU General Public License # along with KFR. -cmake_minimum_required(VERSION 3.1) +cmake_minimum_required(VERSION 3.12) if (WIN32) set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) @@ -57,7 +57,7 @@ function (add_c_library ARCH) ${CMAKE_CURRENT_SOURCE_DIR}/../include/kfr/dsp/impl/dsp-impl.cpp) target_link_libraries(kfr_capi_${ARCH} kfr) target_set_arch(kfr_capi_${ARCH} PRIVATE ${ARCH}) - target_compile_options(kfr_capi_${ARCH} PRIVATE -Xclang -ffast-math) + target_compile_options(kfr_capi_${ARCH} PRIVATE "SHELL:-Xclang -ffp-contract=fast") target_link_libraries(kfr_capi_all INTERFACE kfr_capi_${ARCH}) if (NOT WIN32) @@ -69,7 +69,7 @@ function (add_c_library ARCH) PROPERTY POSITION_INDEPENDENT_CODE 1) target_link_libraries(kfr_capi_${ARCH}_pic kfr) target_set_arch(kfr_capi_${ARCH}_pic PRIVATE ${ARCH}) - target_compile_options(kfr_capi_${ARCH}_pic PRIVATE -Xclang -ffast-math) + target_compile_options(kfr_capi_${ARCH}_pic PRIVATE "SHELL:-Xclang -ffp-contract=fast") target_link_libraries(kfr_capi_all_pic INTERFACE kfr_capi_${ARCH}_pic) endif () diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt @@ -14,7 +14,7 @@ # You should have received a copy of the GNU General Public License # along with KFR. -cmake_minimum_required(VERSION 3.1) +cmake_minimum_required(VERSION 3.12) # Binary output directories set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE ${PROJECT_BINARY_DIR}/bin) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt @@ -14,7 +14,7 @@ # You should have received a copy of the GNU General Public License # along with KFR. -cmake_minimum_required(VERSION 3.1) +cmake_minimum_required(VERSION 3.12) add_definitions(-DKFR_TESTING=1) diff --git a/tests/dft_test.cpp b/tests/dft_test.cpp @@ -6,6 +6,7 @@ #include <kfr/testo/testo.hpp> +#include <chrono> #include <kfr/base.hpp> #include <kfr/dft.hpp> #include <kfr/dsp.hpp> @@ -23,6 +24,58 @@ constexpr ctypes_t<float, double> dft_float_types{}; constexpr ctypes_t<float> dft_float_types{}; #endif +#ifdef __clang__ + +static void full_barrier() { asm volatile("mfence" ::: "memory"); } +static void dont_optimize(const void* in) { asm volatile("" : "+m"(in)); } + +template <typename T> +static void perf_test_t(int size) +{ + print("[PERFORMANCE] DFT ", fmt<'s', 6>(type_name<T>()), " ", fmt<'d', 6>(size), "..."); + random_bit_generator gen1(2247448713, 915890490, 864203735, 2982561); + random_bit_generator gen2(2982561, 2247448713, 915890490, 864203735); + std::chrono::high_resolution_clock::duration duration(0); + dft_plan<T> dft(size); + univector<u8> tmp(dft.temp_size); + uint64_t counter = 0; + while (duration < std::chrono::seconds(1)) + { + univector<complex<T>> data(size); + data = make_complex(gen_random_range<T>(gen1, -1.0, +1.0), gen_random_range<T>(gen2, -1.0, +1.0)); + full_barrier(); + auto start = std::chrono::high_resolution_clock::now(); + dft.execute(data, data, tmp); + + full_barrier(); + duration += std::chrono::high_resolution_clock::now() - start; + dont_optimize(data.data()); + ++counter; + } + double opspersecond = counter / (std::chrono::nanoseconds(duration).count() / 1'000'000'000.0); + println(" ", fmt<'f', 12, 1>(opspersecond), " ops/second"); +} + +static void perf_test(int size) +{ + perf_test_t<float>(size); + perf_test_t<double>(size); +} + +TEST(test_performance) +{ + for (int size = 16; size <= 16384; size <<= 1) + { + perf_test(size); + } + + perf_test(210); + perf_test(3150); + perf_test(211); + perf_test(3163); +} +#endif + TEST(test_convolve) { univector<fbase, 5> a({ 1, 2, 3, 4, 5 }); @@ -126,7 +179,8 @@ TEST(fft_accuracy) testo::matrix(named("type") = dft_float_types, // named("size") = sizes, // - [&gen](auto type, size_t size) { + [&gen](auto type, size_t size) + { using float_type = type_of<decltype(type)>; const double min_prec = 0.000001 * std::log(size) * size; diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt @@ -14,7 +14,7 @@ # You should have received a copy of the GNU General Public License # along with KFR. -cmake_minimum_required(VERSION 3.1) +cmake_minimum_required(VERSION 3.12) # Binary output directories set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE ${PROJECT_BINARY_DIR}/bin)