commit 72429b0ac086a73e8181cb754677f72f2b77d917
parent 3d229639d43675e8001b9c3b8e69c23bb8a7af06
Author: d.levin256@gmail.com <d.levin256@gmail.com>
Date: Thu, 11 Nov 2021 10:19:17 +0000
Workaround for clang bug
Diffstat:
6 files changed, 64 insertions(+), 14 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -14,7 +14,7 @@
# You should have received a copy of the GNU General Public License
# along with KFR.
-cmake_minimum_required(VERSION 3.1)
+cmake_minimum_required(VERSION 3.12)
project(kfr CXX)
@@ -131,7 +131,7 @@ if (APPLE)
endif ()
if (NOT IOS)
if (CLANG)
- target_compile_options(kfr INTERFACE -Xclang -mstackrealign)
+ target_compile_options(kfr INTERFACE "SHELL:-Xclang -mstackrealign")
elseif (NOT MSVC)
target_compile_options(kfr INTERFACE -mstackrealign)
endif ()
@@ -180,11 +180,7 @@ endfunction ()
if (ENABLE_DFT)
- if (MSVC)
- set(KFR_DFT_DEFS -fp:fast)
- else ()
- set(KFR_DFT_DEFS -ffast-math)
- endif ()
+ set(KFR_DFT_DEFS "SHELL:-Xclang -ffp-contract=fast")
if (ENABLE_DFT_MULTIARCH)
add_library(kfr_dft INTERFACE)
diff --git a/capi/CMakeLists.txt b/capi/CMakeLists.txt
@@ -14,7 +14,7 @@
# You should have received a copy of the GNU General Public License
# along with KFR.
-cmake_minimum_required(VERSION 3.1)
+cmake_minimum_required(VERSION 3.12)
if (WIN32)
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
@@ -57,7 +57,7 @@ function (add_c_library ARCH)
${CMAKE_CURRENT_SOURCE_DIR}/../include/kfr/dsp/impl/dsp-impl.cpp)
target_link_libraries(kfr_capi_${ARCH} kfr)
target_set_arch(kfr_capi_${ARCH} PRIVATE ${ARCH})
- target_compile_options(kfr_capi_${ARCH} PRIVATE -Xclang -ffast-math)
+ target_compile_options(kfr_capi_${ARCH} PRIVATE "SHELL:-Xclang -ffp-contract=fast")
target_link_libraries(kfr_capi_all INTERFACE kfr_capi_${ARCH})
if (NOT WIN32)
@@ -69,7 +69,7 @@ function (add_c_library ARCH)
PROPERTY POSITION_INDEPENDENT_CODE 1)
target_link_libraries(kfr_capi_${ARCH}_pic kfr)
target_set_arch(kfr_capi_${ARCH}_pic PRIVATE ${ARCH})
- target_compile_options(kfr_capi_${ARCH}_pic PRIVATE -Xclang -ffast-math)
+ target_compile_options(kfr_capi_${ARCH}_pic PRIVATE "SHELL:-Xclang -ffp-contract=fast")
target_link_libraries(kfr_capi_all_pic INTERFACE kfr_capi_${ARCH}_pic)
endif ()
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
@@ -14,7 +14,7 @@
# You should have received a copy of the GNU General Public License
# along with KFR.
-cmake_minimum_required(VERSION 3.1)
+cmake_minimum_required(VERSION 3.12)
# Binary output directories
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE ${PROJECT_BINARY_DIR}/bin)
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
@@ -14,7 +14,7 @@
# You should have received a copy of the GNU General Public License
# along with KFR.
-cmake_minimum_required(VERSION 3.1)
+cmake_minimum_required(VERSION 3.12)
add_definitions(-DKFR_TESTING=1)
diff --git a/tests/dft_test.cpp b/tests/dft_test.cpp
@@ -6,6 +6,7 @@
#include <kfr/testo/testo.hpp>
+#include <chrono>
#include <kfr/base.hpp>
#include <kfr/dft.hpp>
#include <kfr/dsp.hpp>
@@ -23,6 +24,58 @@ constexpr ctypes_t<float, double> dft_float_types{};
constexpr ctypes_t<float> dft_float_types{};
#endif
+#ifdef __clang__
+
+static void full_barrier() { asm volatile("mfence" ::: "memory"); }
+static void dont_optimize(const void* in) { asm volatile("" : "+m"(in)); }
+
+template <typename T>
+static void perf_test_t(int size)
+{
+ print("[PERFORMANCE] DFT ", fmt<'s', 6>(type_name<T>()), " ", fmt<'d', 6>(size), "...");
+ random_bit_generator gen1(2247448713, 915890490, 864203735, 2982561);
+ random_bit_generator gen2(2982561, 2247448713, 915890490, 864203735);
+ std::chrono::high_resolution_clock::duration duration(0);
+ dft_plan<T> dft(size);
+ univector<u8> tmp(dft.temp_size);
+ uint64_t counter = 0;
+ while (duration < std::chrono::seconds(1))
+ {
+ univector<complex<T>> data(size);
+ data = make_complex(gen_random_range<T>(gen1, -1.0, +1.0), gen_random_range<T>(gen2, -1.0, +1.0));
+ full_barrier();
+ auto start = std::chrono::high_resolution_clock::now();
+ dft.execute(data, data, tmp);
+
+ full_barrier();
+ duration += std::chrono::high_resolution_clock::now() - start;
+ dont_optimize(data.data());
+ ++counter;
+ }
+ double opspersecond = counter / (std::chrono::nanoseconds(duration).count() / 1'000'000'000.0);
+ println(" ", fmt<'f', 12, 1>(opspersecond), " ops/second");
+}
+
+static void perf_test(int size)
+{
+ perf_test_t<float>(size);
+ perf_test_t<double>(size);
+}
+
+TEST(test_performance)
+{
+ for (int size = 16; size <= 16384; size <<= 1)
+ {
+ perf_test(size);
+ }
+
+ perf_test(210);
+ perf_test(3150);
+ perf_test(211);
+ perf_test(3163);
+}
+#endif
+
TEST(test_convolve)
{
univector<fbase, 5> a({ 1, 2, 3, 4, 5 });
@@ -126,7 +179,8 @@ TEST(fft_accuracy)
testo::matrix(named("type") = dft_float_types, //
named("size") = sizes, //
- [&gen](auto type, size_t size) {
+ [&gen](auto type, size_t size)
+ {
using float_type = type_of<decltype(type)>;
const double min_prec = 0.000001 * std::log(size) * size;
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
@@ -14,7 +14,7 @@
# You should have received a copy of the GNU General Public License
# along with KFR.
-cmake_minimum_required(VERSION 3.1)
+cmake_minimum_required(VERSION 3.12)
# Binary output directories
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE ${PROJECT_BINARY_DIR}/bin)